crash_gen.py 90.0 KB
Newer Older
S
Shuduo Sang 已提交
1
# -----!/usr/bin/python3.7
S
Steven Li 已提交
2 3 4 5 6 7 8 9 10 11 12 13
###################################################################
#           Copyright (c) 2016 by TAOS Technologies, Inc.
#                     All rights reserved.
#
#  This file is proprietary and confidential to TAOS Technologies.
#  No part of this file may be reproduced, stored, transmitted,
#  disclosed or used in any form or by any means other than as
#  expressly provided by the written permission from Jianhui Tao
#
###################################################################

# -*- coding: utf-8 -*-
S
Shuduo Sang 已提交
14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
# For type hinting before definition, ref:
# https://stackoverflow.com/questions/33533148/how-do-i-specify-that-the-return-type-of-a-method-is-the-same-as-the-class-itsel
from __future__ import annotations
import taos
import crash_gen
from util.sql import *
from util.cases import *
from util.dnodes import *
from util.log import *
from queue import Queue, Empty
from typing import IO
from typing import Set
from typing import Dict
from typing import List
from requests.auth import HTTPBasicAuth
import textwrap
import datetime
import logging
import time
import random
import threading
import requests
import copy
import argparse
import getopt
39

S
Steven Li 已提交
40
import sys
41
import os
42 43
import io
import signal
44
import traceback
45 46 47 48
# Require Python 3
if sys.version_info[0] < 3:
    raise Exception("Must be using Python 3")

S
Steven Li 已提交
49

S
Shuduo Sang 已提交
50
# Global variables, tried to keep a small number.
51 52 53

# Command-line/Environment Configurations, will set a bit later
# ConfigNameSpace = argparse.Namespace
S
Shuduo Sang 已提交
54
gConfig = argparse.Namespace()  # Dummy value, will be replaced later
55
logger = None
S
Steven Li 已提交
56

S
Shuduo Sang 已提交
57 58

def runThread(wt: WorkerThread):
59
    wt.run()
60

S
Shuduo Sang 已提交
61

62 63
class CrashGenError(Exception):
    def __init__(self, msg=None, errno=None):
S
Shuduo Sang 已提交
64
        self.msg = msg
65
        self.errno = errno
S
Shuduo Sang 已提交
66

67 68 69
    def __str__(self):
        return self.msg

S
Shuduo Sang 已提交
70

S
Steven Li 已提交
71
class WorkerThread:
S
Shuduo Sang 已提交
72 73 74 75 76
    def __init__(self, pool: ThreadPool, tid,
                 tc: ThreadCoordinator,
                 # te: TaskExecutor,
                 ):  # note: main thread context!
        # self._curStep = -1
77
        self._pool = pool
S
Shuduo Sang 已提交
78 79
        self._tid = tid
        self._tc = tc  # type: ThreadCoordinator
S
Steven Li 已提交
80
        # self.threadIdent = threading.get_ident()
81 82
        self._thread = threading.Thread(target=runThread, args=(self,))
        self._stepGate = threading.Event()
S
Steven Li 已提交
83

84
        # Let us have a DB connection of our own
S
Shuduo Sang 已提交
85
        if (gConfig.per_thread_db_connection):  # type: ignore
86
            # print("connector_type = {}".format(gConfig.connector_type))
S
Shuduo Sang 已提交
87 88
            self._dbConn = DbConn.createNative() if (
                gConfig.connector_type == 'native') else DbConn.createRest()
89

S
Shuduo Sang 已提交
90
        self._dbInUse = False  # if "use db" was executed already
91

92
    def logDebug(self, msg):
S
Steven Li 已提交
93
        logger.debug("    TRD[{}] {}".format(self._tid, msg))
94 95

    def logInfo(self, msg):
S
Steven Li 已提交
96
        logger.info("    TRD[{}] {}".format(self._tid, msg))
97

98 99 100 101
    def dbInUse(self):
        return self._dbInUse

    def useDb(self):
S
Shuduo Sang 已提交
102
        if (not self._dbInUse):
103 104 105
            self.execSql("use db")
        self._dbInUse = True

106
    def getTaskExecutor(self):
S
Shuduo Sang 已提交
107
        return self._tc.getTaskExecutor()
108

S
Steven Li 已提交
109
    def start(self):
110
        self._thread.start()  # AFTER the thread is recorded
S
Steven Li 已提交
111

S
Shuduo Sang 已提交
112
    def run(self):
S
Steven Li 已提交
113
        # initialization after thread starts, in the thread context
114
        # self.isSleeping = False
115 116
        logger.info("Starting to run thread: {}".format(self._tid))

S
Shuduo Sang 已提交
117
        if (gConfig.per_thread_db_connection):  # type: ignore
118
            logger.debug("Worker thread openning database connection")
119
            self._dbConn.open()
S
Steven Li 已提交
120

S
Shuduo Sang 已提交
121 122
        self._doTaskLoop()

123
        # clean up
S
Shuduo Sang 已提交
124
        if (gConfig.per_thread_db_connection):  # type: ignore
125
            self._dbConn.close()
126

S
Shuduo Sang 已提交
127
    def _doTaskLoop(self):
128 129
        # while self._curStep < self._pool.maxSteps:
        # tc = ThreadCoordinator(None)
S
Shuduo Sang 已提交
130 131
        while True:
            tc = self._tc  # Thread Coordinator, the overall master
132
            tc.crossStepBarrier()  # shared barrier first, INCLUDING the last one
S
Shuduo Sang 已提交
133 134 135
            logger.debug(
                "[TRD] Worker thread [{}] exited barrier...".format(
                    self._tid))
136
            self.crossStepGate()   # then per-thread gate, after being tapped
S
Shuduo Sang 已提交
137 138 139
            logger.debug(
                "[TRD] Worker thread [{}] exited step gate...".format(
                    self._tid))
140
            if not self._tc.isRunning():
S
Shuduo Sang 已提交
141 142
                logger.debug(
                    "[TRD] Thread Coordinator not running any more, worker thread now stopping...")
143 144
                break

145
            # Fetch a task from the Thread Coordinator
S
Shuduo Sang 已提交
146 147 148
            logger.debug(
                "[TRD] Worker thread [{}] about to fetch task".format(
                    self._tid))
149
            task = tc.fetchTask()
150 151

            # Execute such a task
S
Shuduo Sang 已提交
152 153 154
            logger.debug(
                "[TRD] Worker thread [{}] about to execute task: {}".format(
                    self._tid, task.__class__.__name__))
155
            task.execute(self)
156
            tc.saveExecutedTask(task)
S
Shuduo Sang 已提交
157 158 159 160 161
            logger.debug(
                "[TRD] Worker thread [{}] finished executing task".format(
                    self._tid))

            self._dbInUse = False  # there may be changes between steps
162

S
Shuduo Sang 已提交
163 164
    def verifyThreadSelf(self):  # ensure we are called by this own thread
        if (threading.get_ident() != self._thread.ident):
S
Steven Li 已提交
165 166
            raise RuntimeError("Unexpectly called from other threads")

S
Shuduo Sang 已提交
167 168
    def verifyThreadMain(self):  # ensure we are called by the main thread
        if (threading.get_ident() != threading.main_thread().ident):
S
Steven Li 已提交
169 170 171
            raise RuntimeError("Unexpectly called from other threads")

    def verifyThreadAlive(self):
S
Shuduo Sang 已提交
172
        if (not self._thread.is_alive()):
S
Steven Li 已提交
173 174
            raise RuntimeError("Unexpected dead thread")

175
    # A gate is different from a barrier in that a thread needs to be "tapped"
S
Steven Li 已提交
176 177
    def crossStepGate(self):
        self.verifyThreadAlive()
S
Shuduo Sang 已提交
178 179
        self.verifyThreadSelf()  # only allowed by ourselves

180
        # Wait again at the "gate", waiting to be "tapped"
S
Shuduo Sang 已提交
181 182 183 184
        logger.debug(
            "[TRD] Worker thread {} about to cross the step gate".format(
                self._tid))
        self._stepGate.wait()
185
        self._stepGate.clear()
S
Shuduo Sang 已提交
186

187
        # self._curStep += 1  # off to a new step...
S
Steven Li 已提交
188

S
Shuduo Sang 已提交
189
    def tapStepGate(self):  # give it a tap, release the thread waiting there
S
Steven Li 已提交
190
        self.verifyThreadAlive()
S
Shuduo Sang 已提交
191 192
        self.verifyThreadMain()  # only allowed for main thread

S
Steven Li 已提交
193
        logger.debug("[TRD] Tapping worker thread {}".format(self._tid))
S
Shuduo Sang 已提交
194 195
        self._stepGate.set()  # wake up!
        time.sleep(0)  # let the released thread run a bit
196

S
Shuduo Sang 已提交
197 198 199
    def execSql(self, sql):  # TODO: expose DbConn directly
        if (gConfig.per_thread_db_connection):
            return self._dbConn.execute(sql)
200
        else:
201
            return self._tc.getDbManager().getDbConn().execute(sql)
202

S
Shuduo Sang 已提交
203 204 205
    def querySql(self, sql):  # TODO: expose DbConn directly
        if (gConfig.per_thread_db_connection):
            return self._dbConn.query(sql)
206 207 208 209
        else:
            return self._tc.getDbManager().getDbConn().query(sql)

    def getQueryResult(self):
S
Shuduo Sang 已提交
210 211
        if (gConfig.per_thread_db_connection):
            return self._dbConn.getQueryResult()
212 213 214
        else:
            return self._tc.getDbManager().getDbConn().getQueryResult()

215
    def getDbConn(self):
S
Shuduo Sang 已提交
216 217
        if (gConfig.per_thread_db_connection):
            return self._dbConn
218
        else:
219
            return self._tc.getDbManager().getDbConn()
220

221 222
    # def querySql(self, sql): # not "execute", since we are out side the DB context
    #     if ( gConfig.per_thread_db_connection ):
S
Shuduo Sang 已提交
223
    #         return self._dbConn.query(sql)
224 225
    #     else:
    #         return self._tc.getDbState().getDbConn().query(sql)
226

227
# The coordinator of all worker threads, mostly running in main thread
S
Shuduo Sang 已提交
228 229


230
class ThreadCoordinator:
231
    def __init__(self, pool: ThreadPool, dbManager):
S
Shuduo Sang 已提交
232
        self._curStep = -1  # first step is 0
233
        self._pool = pool
234
        # self._wd = wd
S
Shuduo Sang 已提交
235
        self._te = None  # prepare for every new step
236
        self._dbManager = dbManager
S
Shuduo Sang 已提交
237 238
        self._executedTasks: List[Task] = []  # in a given step
        self._lock = threading.RLock()  # sync access for a few things
S
Steven Li 已提交
239

S
Shuduo Sang 已提交
240 241
        self._stepBarrier = threading.Barrier(
            self._pool.numThreads + 1)  # one barrier for all threads
242
        self._execStats = ExecutionStats()
243
        self._runStatus = MainExec.STATUS_RUNNING
S
Steven Li 已提交
244

245 246 247
    def getTaskExecutor(self):
        return self._te

S
Shuduo Sang 已提交
248
    def getDbManager(self) -> DbManager:
249
        return self._dbManager
250

251 252 253
    def crossStepBarrier(self):
        self._stepBarrier.wait()

254 255 256 257
    def requestToStop(self):
        self._runStatus = MainExec.STATUS_STOPPING
        self._execStats.registerFailure("User Interruption")

S
Shuduo Sang 已提交
258
    def run(self):
259
        self._pool.createAndStartThreads(self)
S
Steven Li 已提交
260 261

        # Coordinate all threads step by step
S
Shuduo Sang 已提交
262 263 264
        self._curStep = -1  # not started yet
        maxSteps = gConfig.max_steps  # type: ignore
        self._execStats.startExec()  # start the stop watch
265 266
        transitionFailed = False
        hasAbortedTask = False
S
Shuduo Sang 已提交
267 268 269 270 271 272 273 274
        while(self._curStep < maxSteps - 1 and
              (not transitionFailed) and
              (self._runStatus == MainExec.STATUS_RUNNING) and
                (not hasAbortedTask)):  # maxStep==10, last curStep should be 9

            if not gConfig.debug:
                # print this only if we are not in debug mode
                print(".", end="", flush=True)
S
Steven Li 已提交
275
            logger.debug("[TRD] Main thread going to sleep")
276

277
            # Now main thread (that's us) is ready to enter a step
S
Shuduo Sang 已提交
278 279 280 281
            # let other threads go past the pool barrier, but wait at the
            # thread gate
            self.crossStepBarrier()
            self._stepBarrier.reset()  # Other worker threads should now be at the "gate"
282 283

            # At this point, all threads should be pass the overall "barrier" and before the per-thread "gate"
S
Shuduo Sang 已提交
284 285
            # We use this period to do house keeping work, when all worker
            # threads are QUIET.
286
            hasAbortedTask = False
S
Shuduo Sang 已提交
287 288
            for task in self._executedTasks:
                if task.isAborted():
289 290 291 292
                    print("Task aborted: {}".format(task))
                    hasAbortedTask = True
                    break

S
Shuduo Sang 已提交
293
            if hasAbortedTask:  # do transition only if tasks are error free
294
                self._execStats.registerFailure("Aborted Task Encountered")
S
Shuduo Sang 已提交
295
            else:
296 297 298
                try:
                    sm = self._dbManager.getStateMachine()
                    logger.debug("[STT] starting transitions")
S
Shuduo Sang 已提交
299 300
                    # at end of step, transiton the DB state
                    sm.transition(self._executedTasks)
301
                    logger.debug("[STT] transition ended")
S
Shuduo Sang 已提交
302 303 304
                    # Due to limitation (or maybe not) of the Python library,
                    # we cannot share connections across threads
                    if sm.hasDatabase():
305 306 307
                        for t in self._pool.threadList:
                            logger.debug("[DB] use db for all worker threads")
                            t.useDb()
S
Shuduo Sang 已提交
308 309
                            # t.execSql("use db") # main thread executing "use
                            # db" on behalf of every worker thread
310
                except taos.error.ProgrammingError as err:
S
Shuduo Sang 已提交
311
                    if (err.msg == 'network unavailable'):  # broken DB connection
312 313 314
                        logger.info("DB connection broken, execution failed")
                        traceback.print_stack()
                        transitionFailed = True
S
Shuduo Sang 已提交
315
                        self._te = None  # Not running any more
316
                        self._execStats.registerFailure("Broken DB Connection")
S
Shuduo Sang 已提交
317 318
                        # continue # don't do that, need to tap all threads at
                        # end, and maybe signal them to stop
319
                    else:
S
Shuduo Sang 已提交
320
                        raise
321 322
            # finally:
            #     pass
S
Shuduo Sang 已提交
323 324

            self.resetExecutedTasks()  # clear the tasks after we are done
325 326

            # Get ready for next step
S
Steven Li 已提交
327
            logger.debug("<-- Step {} finished".format(self._curStep))
S
Shuduo Sang 已提交
328 329 330 331
            self._curStep += 1  # we are about to get into next step. TODO: race condition here!
            # Now not all threads had time to go to sleep
            logger.debug(
                "\r\n\n--> Step {} starts with main thread waking up".format(self._curStep))
332

333
            # A new TE for the new step
S
Shuduo Sang 已提交
334
            if not transitionFailed:  # only if not failed
335
                self._te = TaskExecutor(self._curStep)
336

S
Shuduo Sang 已提交
337 338 339 340 341 342
            logger.debug(
                "[TRD] Main thread waking up at step {}, tapping worker threads".format(
                    self._curStep))  # Now not all threads had time to go to sleep
            # Worker threads will wake up at this point, and each execute it's
            # own task
            self.tapAllThreads()
S
Steven Li 已提交
343

344
        logger.debug("Main thread ready to finish up...")
S
Shuduo Sang 已提交
345 346
        if not transitionFailed:  # only in regular situations
            self.crossStepBarrier()  # Cross it one last time, after all threads finish
347 348
            self._stepBarrier.reset()
            logger.debug("Main thread in exclusive zone...")
S
Shuduo Sang 已提交
349
            self._te = None  # No more executor, time to end
350
            logger.debug("Main thread tapping all threads one last time...")
S
Shuduo Sang 已提交
351
            self.tapAllThreads()  # Let the threads run one last time
352

353
        logger.debug("Main thread joining all threads")
S
Shuduo Sang 已提交
354
        self._pool.joinAll()  # Get all threads to finish
355
        logger.info("\nAll worker threads finished")
356 357
        self._execStats.endExec()

358 359
    def printStats(self):
        self._execStats.printStats()
S
Steven Li 已提交
360

S
Steven Li 已提交
361 362 363 364 365 366
    def isFailed(self):
        return self._execStats.isFailed()

    def getExecStats(self):
        return self._execStats

S
Shuduo Sang 已提交
367
    def tapAllThreads(self):  # in a deterministic manner
S
Steven Li 已提交
368
        wakeSeq = []
S
Shuduo Sang 已提交
369 370
        for i in range(self._pool.numThreads):  # generate a random sequence
            if Dice.throw(2) == 1:
S
Steven Li 已提交
371 372 373
                wakeSeq.append(i)
            else:
                wakeSeq.insert(0, i)
S
Shuduo Sang 已提交
374 375 376
        logger.debug(
            "[TRD] Main thread waking up worker threads: {}".format(
                str(wakeSeq)))
377
        # TODO: set dice seed to a deterministic value
S
Steven Li 已提交
378
        for i in wakeSeq:
S
Shuduo Sang 已提交
379 380 381
            # TODO: maybe a bit too deep?!
            self._pool.threadList[i].tapStepGate()
            time.sleep(0)  # yield
S
Steven Li 已提交
382

383
    def isRunning(self):
S
Shuduo Sang 已提交
384
        return self._te is not None
385

S
Shuduo Sang 已提交
386 387
    def fetchTask(self) -> Task:
        if (not self.isRunning()):  # no task
388
            raise RuntimeError("Cannot fetch task when not running")
389 390
        # return self._wd.pickTask()
        # Alternatively, let's ask the DbState for the appropriate task
391 392 393 394 395 396 397
        # dbState = self.getDbState()
        # tasks = dbState.getTasksAtState() # TODO: create every time?
        # nTasks = len(tasks)
        # i = Dice.throw(nTasks)
        # logger.debug(" (dice:{}/{}) ".format(i, nTasks))
        # # return copy.copy(tasks[i]) # Needs a fresh copy, to save execution results, etc.
        # return tasks[i].clone() # TODO: still necessary?
S
Shuduo Sang 已提交
398 399 400 401 402
        # pick a task type for current state
        taskType = self.getDbManager().getStateMachine().pickTaskType()
        return taskType(
            self.getDbManager(),
            self._execStats)  # create a task from it
403 404

    def resetExecutedTasks(self):
S
Shuduo Sang 已提交
405
        self._executedTasks = []  # should be under single thread
406 407 408 409

    def saveExecutedTask(self, task):
        with self._lock:
            self._executedTasks.append(task)
410 411

# We define a class to run a number of threads in locking steps.
S
Shuduo Sang 已提交
412 413


414
class ThreadPool:
415
    def __init__(self, numThreads, maxSteps):
416 417 418 419
        self.numThreads = numThreads
        self.maxSteps = maxSteps
        # Internal class variables
        self.curStep = 0
S
Shuduo Sang 已提交
420 421
        self.threadList = []  # type: List[WorkerThread]

422
    # starting to run all the threads, in locking steps
423
    def createAndStartThreads(self, tc: ThreadCoordinator):
S
Shuduo Sang 已提交
424 425
        for tid in range(0, self.numThreads):  # Create the threads
            workerThread = WorkerThread(self, tid, tc)
426
            self.threadList.append(workerThread)
S
Shuduo Sang 已提交
427
            workerThread.start()  # start, but should block immediately before step 0
428 429 430 431 432 433

    def joinAll(self):
        for workerThread in self.threadList:
            logger.debug("Joining thread...")
            workerThread._thread.join()

434 435
# A queue of continguous POSITIVE integers, used by DbManager to generate continuous numbers
# for new table names
S
Shuduo Sang 已提交
436 437


S
Steven Li 已提交
438 439
class LinearQueue():
    def __init__(self):
440
        self.firstIndex = 1  # 1st ever element
S
Steven Li 已提交
441
        self.lastIndex = 0
S
Shuduo Sang 已提交
442 443
        self._lock = threading.RLock()  # our functions may call each other
        self.inUse = set()  # the indexes that are in use right now
S
Steven Li 已提交
444

445
    def toText(self):
S
Shuduo Sang 已提交
446 447
        return "[{}..{}], in use: {}".format(
            self.firstIndex, self.lastIndex, self.inUse)
448 449

    # Push (add new element, largest) to the tail, and mark it in use
S
Shuduo Sang 已提交
450
    def push(self):
451
        with self._lock:
S
Shuduo Sang 已提交
452 453
            # if ( self.isEmpty() ):
            #     self.lastIndex = self.firstIndex
454
            #     return self.firstIndex
455 456
            # Otherwise we have something
            self.lastIndex += 1
457 458
            self.allocate(self.lastIndex)
            # self.inUse.add(self.lastIndex) # mark it in use immediately
459
            return self.lastIndex
S
Steven Li 已提交
460 461

    def pop(self):
462
        with self._lock:
S
Shuduo Sang 已提交
463 464 465 466
            if (self.isEmpty()):
                # raise RuntimeError("Cannot pop an empty queue")
                return False  # TODO: None?

467
            index = self.firstIndex
S
Shuduo Sang 已提交
468
            if (index in self.inUse):
469 470
                return False

471 472 473 474 475 476 477
            self.firstIndex += 1
            return index

    def isEmpty(self):
        return self.firstIndex > self.lastIndex

    def popIfNotEmpty(self):
478
        with self._lock:
479 480 481 482
            if (self.isEmpty()):
                return 0
            return self.pop()

S
Steven Li 已提交
483
    def allocate(self, i):
484
        with self._lock:
485
            # logger.debug("LQ allocating item {}".format(i))
S
Shuduo Sang 已提交
486 487 488
            if (i in self.inUse):
                raise RuntimeError(
                    "Cannot re-use same index in queue: {}".format(i))
489 490
            self.inUse.add(i)

S
Steven Li 已提交
491
    def release(self, i):
492
        with self._lock:
493
            # logger.debug("LQ releasing item {}".format(i))
S
Shuduo Sang 已提交
494
            self.inUse.remove(i)  # KeyError possible, TODO: why?
495 496 497 498

    def size(self):
        return self.lastIndex + 1 - self.firstIndex

S
Steven Li 已提交
499
    def pickAndAllocate(self):
S
Shuduo Sang 已提交
500
        if (self.isEmpty()):
501 502
            return None
        with self._lock:
S
Shuduo Sang 已提交
503
            cnt = 0  # counting the interations
504 505
            while True:
                cnt += 1
S
Shuduo Sang 已提交
506
                if (cnt > self.size() * 10):  # 10x iteration already
507 508
                    # raise RuntimeError("Failed to allocate LinearQueue element")
                    return None
S
Shuduo Sang 已提交
509 510
                ret = Dice.throwRange(self.firstIndex, self.lastIndex + 1)
                if (ret not in self.inUse):
511 512 513
                    self.allocate(ret)
                    return ret

S
Shuduo Sang 已提交
514

515
class DbConn:
516 517 518 519 520 521 522 523 524 525 526
    TYPE_NATIVE = "native-c"
    TYPE_REST = "rest-api"
    TYPE_INVALID = "invalid"

    @classmethod
    def create(cls, connType):
        if connType == cls.TYPE_NATIVE:
            return DbConnNative()
        elif connType == cls.TYPE_REST:
            return DbConnRest()
        else:
S
Shuduo Sang 已提交
527 528
            raise RuntimeError(
                "Unexpected connection type: {}".format(connType))
529 530 531 532 533 534 535 536 537

    @classmethod
    def createNative(cls):
        return cls.create(cls.TYPE_NATIVE)

    @classmethod
    def createRest(cls):
        return cls.create(cls.TYPE_REST)

538 539
    def __init__(self):
        self.isOpen = False
540 541 542
        self._type = self.TYPE_INVALID

    def open(self):
S
Shuduo Sang 已提交
543
        if (self.isOpen):
544 545
            raise RuntimeError("Cannot re-open an existing DB connection")

546 547
        # below implemented by child classes
        self.openByType()
548

S
Shuduo Sang 已提交
549 550 551
        logger.debug(
            "[DB] data connection opened, type = {}".format(
                self._type))
552 553
        self.isOpen = True

S
Shuduo Sang 已提交
554 555 556 557
    def resetDb(self):  # reset the whole database, etc.
        if (not self.isOpen):
            raise RuntimeError(
                "Cannot reset database until connection is open")
558 559
        # self._tdSql.prepare() # Recreate database, etc.

560
        self.execute('drop database if exists db')
561 562
        logger.debug("Resetting DB, dropped database")
        # self._cursor.execute('create database db')
563
        # self._cursor.execute('use db')
564 565
        # tdSql.execute('show databases')

S
Shuduo Sang 已提交
566
    def queryScalar(self, sql) -> int:
567 568
        return self._queryAny(sql)

S
Shuduo Sang 已提交
569
    def queryString(self, sql) -> str:
570 571
        return self._queryAny(sql)

S
Shuduo Sang 已提交
572 573 574 575
    def _queryAny(self, sql):  # actual query result as an int
        if (not self.isOpen):
            raise RuntimeError(
                "Cannot query database until connection is open")
576
        nRows = self.query(sql)
S
Shuduo Sang 已提交
577 578 579 580
        if nRows != 1:
            raise RuntimeError(
                "Unexpected result for query: {}, rows = {}".format(
                    sql, nRows))
581
        if self.getResultRows() != 1 or self.getResultCols() != 1:
S
Shuduo Sang 已提交
582 583
            raise RuntimeError(
                "Unexpected result set for query: {}".format(sql))
584 585 586 587
        return self.getQueryResult()[0][0]

    def execute(self, sql):
        raise RuntimeError("Unexpected execution, should be overriden")
S
Shuduo Sang 已提交
588

589 590
    def openByType(self):
        raise RuntimeError("Unexpected execution, should be overriden")
S
Shuduo Sang 已提交
591

592 593
    def getQueryResult(self):
        raise RuntimeError("Unexpected execution, should be overriden")
S
Shuduo Sang 已提交
594

595 596
    def getResultRows(self):
        raise RuntimeError("Unexpected execution, should be overriden")
S
Shuduo Sang 已提交
597

598 599 600 601
    def getResultCols(self):
        raise RuntimeError("Unexpected execution, should be overriden")

# Sample: curl -u root:taosdata -d "show databases" localhost:6020/rest/sql
S
Shuduo Sang 已提交
602 603


604 605 606 607
class DbConnRest(DbConn):
    def __init__(self):
        super().__init__()
        self._type = self.TYPE_REST
S
Shuduo Sang 已提交
608
        self._url = "http://localhost:6020/rest/sql"  # fixed for now
609 610
        self._result = None

S
Shuduo Sang 已提交
611 612 613
    def openByType(self):  # Open connection
        pass  # do nothing, always open

614
    def close(self):
S
Shuduo Sang 已提交
615 616 617
        if (not self.isOpen):
            raise RuntimeError(
                "Cannot clean up database until connection is open")
618 619 620 621 622
        # Do nothing for REST
        logger.debug("[DB] REST Database connection closed")
        self.isOpen = False

    def _doSql(self, sql):
S
Shuduo Sang 已提交
623 624 625
        r = requests.post(self._url,
                          data=sql,
                          auth=HTTPBasicAuth('root', 'taosdata'))
626 627
        rj = r.json()
        # Sanity check for the "Json Result"
S
Shuduo Sang 已提交
628
        if ('status' not in rj):
629 630
            raise RuntimeError("No status in REST response")

S
Shuduo Sang 已提交
631 632 633 634
        if rj['status'] == 'error':  # clearly reported error
            if ('code' not in rj):  # error without code
                raise RuntimeError("REST error return without code")
            errno = rj['code']  # May need to massage this in the future
635
            # print("Raising programming error with REST return: {}".format(rj))
S
Shuduo Sang 已提交
636 637
            raise taos.error.ProgrammingError(
                rj['desc'], errno)  # todo: check existance of 'desc'
638

S
Shuduo Sang 已提交
639 640 641 642
        if rj['status'] != 'succ':  # better be this
            raise RuntimeError(
                "Unexpected REST return status: {}".format(
                    rj['status']))
643 644

        nRows = rj['rows'] if ('rows' in rj) else 0
S
Shuduo Sang 已提交
645
        self._result = rj
646 647
        return nRows

S
Shuduo Sang 已提交
648 649 650 651
    def execute(self, sql):
        if (not self.isOpen):
            raise RuntimeError(
                "Cannot execute database commands until connection is open")
652 653
        logger.debug("[SQL-REST] Executing SQL: {}".format(sql))
        nRows = self._doSql(sql)
S
Shuduo Sang 已提交
654 655
        logger.debug(
            "[SQL-REST] Execution Result, nRows = {}, SQL = {}".format(nRows, sql))
656 657
        return nRows

S
Shuduo Sang 已提交
658
    def query(self, sql):  # return rows affected
659 660 661 662 663 664 665 666 667 668 669 670 671
        return self.execute(sql)

    def getQueryResult(self):
        return self._result['data']

    def getResultRows(self):
        print(self._result)
        raise RuntimeError("TBD")
        # return self._tdSql.queryRows

    def getResultCols(self):
        print(self._result)
        raise RuntimeError("TBD")
S
Shuduo Sang 已提交
672

673
    # Duplicate code from TDMySQL, TODO: merge all this into DbConnNative
674 675


676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703
class MyTDSql:
    def __init__(self):
        self.queryRows = 0
        self.queryCols = 0
        self.affectedRows = 0

    def init(self, cursor, log=True):
        self.cursor = cursor
        # if (log):
        #     caller = inspect.getframeinfo(inspect.stack()[1][0])
        #     self.cursor.log(caller.filename + ".sql")

    def close(self):
        self.cursor.close()

    def query(self, sql):
        self.sql = sql
        try:
            self.cursor.execute(sql)
            self.queryResult = self.cursor.fetchall()
            self.queryRows = len(self.queryResult)
            self.queryCols = len(self.cursor.description)
        except Exception as e:
            # caller = inspect.getframeinfo(inspect.stack()[1][0])
            # args = (caller.filename, caller.lineno, sql, repr(e))
            # tdLog.exit("%s(%d) failed: sql:%s, %s" % args)
            raise
        return self.queryRows
704

705 706 707 708 709 710 711 712 713 714 715
    def execute(self, sql):
        self.sql = sql
        try:
            self.affectedRows = self.cursor.execute(sql)
        except Exception as e:
            # caller = inspect.getframeinfo(inspect.stack()[1][0])
            # args = (caller.filename, caller.lineno, sql, repr(e))
            # tdLog.exit("%s(%d) failed: sql:%s, %s" % args)
            raise
        return self.affectedRows

S
Shuduo Sang 已提交
716

717 718 719 720
class DbConnNative(DbConn):
    def __init__(self):
        super().__init__()
        self._type = self.TYPE_REST
S
Shuduo Sang 已提交
721
        self._conn = None
722
        self._cursor = None
S
Shuduo Sang 已提交
723

724 725 726 727 728 729 730 731 732 733 734
    def getBuildPath(self):
        selfPath = os.path.dirname(os.path.realpath(__file__))
        if ("community" in selfPath):
            projPath = selfPath[:selfPath.find("communit")]
        else:
            projPath = selfPath[:selfPath.find("tests")]

        for root, dirs, files in os.walk(projPath):
            if ("taosd" in files):
                rootRealPath = os.path.dirname(os.path.realpath(root))
                if ("packaging" not in rootRealPath):
S
Shuduo Sang 已提交
735
                    buildPath = root[:len(root) - len("/build/bin")]
736 737 738
                    break
        return buildPath

S
Shuduo Sang 已提交
739
    def openByType(self):  # Open connection
740
        cfgPath = self.getBuildPath() + "/test/cfg"
S
Shuduo Sang 已提交
741 742 743
        self._conn = taos.connect(
            host="127.0.0.1",
            config=cfgPath)  # TODO: make configurable
744 745 746 747
        self._cursor = self._conn.cursor()

        # Get the connection/cursor ready
        self._cursor.execute('reset query cache')
S
Shuduo Sang 已提交
748 749
        # self._cursor.execute('use db') # do this at the beginning of every
        # step
750 751

        # Open connection
752
        self._tdSql = MyTDSql()
753
        self._tdSql.init(self._cursor)
S
Shuduo Sang 已提交
754

755
    def close(self):
S
Shuduo Sang 已提交
756 757 758
        if (not self.isOpen):
            raise RuntimeError(
                "Cannot clean up database until connection is open")
759
        self._tdSql.close()
760
        logger.debug("[DB] Database connection closed")
761
        self.isOpen = False
S
Steven Li 已提交
762

S
Shuduo Sang 已提交
763 764 765 766
    def execute(self, sql):
        if (not self.isOpen):
            raise RuntimeError(
                "Cannot execute database commands until connection is open")
767 768
        logger.debug("[SQL] Executing SQL: {}".format(sql))
        nRows = self._tdSql.execute(sql)
S
Shuduo Sang 已提交
769 770 771
        logger.debug(
            "[SQL] Execution Result, nRows = {}, SQL = {}".format(
                nRows, sql))
772
        return nRows
S
Steven Li 已提交
773

S
Shuduo Sang 已提交
774 775 776 777
    def query(self, sql):  # return rows affected
        if (not self.isOpen):
            raise RuntimeError(
                "Cannot query database until connection is open")
778 779
        logger.debug("[SQL] Executing SQL: {}".format(sql))
        nRows = self._tdSql.query(sql)
S
Shuduo Sang 已提交
780 781 782
        logger.debug(
            "[SQL] Query Result, nRows = {}, SQL = {}".format(
                nRows, sql))
783
        return nRows
784
        # results are in: return self._tdSql.queryResult
785

786 787 788
    def getQueryResult(self):
        return self._tdSql.queryResult

789 790
    def getResultRows(self):
        return self._tdSql.queryRows
791

792 793
    def getResultCols(self):
        return self._tdSql.queryCols
794

S
Shuduo Sang 已提交
795

796
class AnyState:
S
Shuduo Sang 已提交
797 798 799
    STATE_INVALID = -1
    STATE_EMPTY = 0  # nothing there, no even a DB
    STATE_DB_ONLY = 1  # we have a DB, but nothing else
800
    STATE_TABLE_ONLY = 2  # we have a table, but totally empty
S
Shuduo Sang 已提交
801
    STATE_HAS_DATA = 3  # we have some data in the table
802 803 804 805 806
    _stateNames = ["Invalid", "Empty", "DB_Only", "Table_Only", "Has_Data"]

    STATE_VAL_IDX = 0
    CAN_CREATE_DB = 1
    CAN_DROP_DB = 2
807 808
    CAN_CREATE_FIXED_SUPER_TABLE = 3
    CAN_DROP_FIXED_SUPER_TABLE = 4
809 810 811 812 813 814 815
    CAN_ADD_DATA = 5
    CAN_READ_DATA = 6

    def __init__(self):
        self._info = self.getInfo()

    def __str__(self):
S
Shuduo Sang 已提交
816 817
        # -1 hack to accomodate the STATE_INVALID case
        return self._stateNames[self._info[self.STATE_VAL_IDX] + 1]
818 819 820 821

    def getInfo(self):
        raise RuntimeError("Must be overriden by child classes")

S
Steven Li 已提交
822 823 824 825 826 827
    def equals(self, other):
        if isinstance(other, int):
            return self.getValIndex() == other
        elif isinstance(other, AnyState):
            return self.getValIndex() == other.getValIndex()
        else:
S
Shuduo Sang 已提交
828 829 830
            raise RuntimeError(
                "Unexpected comparison, type = {}".format(
                    type(other)))
S
Steven Li 已提交
831

832 833 834
    def verifyTasksToState(self, tasks, newState):
        raise RuntimeError("Must be overriden by child classes")

S
Steven Li 已提交
835 836 837
    def getValIndex(self):
        return self._info[self.STATE_VAL_IDX]

838 839
    def getValue(self):
        return self._info[self.STATE_VAL_IDX]
S
Shuduo Sang 已提交
840

841 842
    def canCreateDb(self):
        return self._info[self.CAN_CREATE_DB]
S
Shuduo Sang 已提交
843

844 845
    def canDropDb(self):
        return self._info[self.CAN_DROP_DB]
S
Shuduo Sang 已提交
846

847 848
    def canCreateFixedSuperTable(self):
        return self._info[self.CAN_CREATE_FIXED_SUPER_TABLE]
S
Shuduo Sang 已提交
849

850 851
    def canDropFixedSuperTable(self):
        return self._info[self.CAN_DROP_FIXED_SUPER_TABLE]
S
Shuduo Sang 已提交
852

853 854
    def canAddData(self):
        return self._info[self.CAN_ADD_DATA]
S
Shuduo Sang 已提交
855

856 857 858 859 860
    def canReadData(self):
        return self._info[self.CAN_READ_DATA]

    def assertAtMostOneSuccess(self, tasks, cls):
        sCnt = 0
S
Shuduo Sang 已提交
861
        for task in tasks:
862 863 864
            if not isinstance(task, cls):
                continue
            if task.isSuccess():
S
Steven Li 已提交
865
                # task.logDebug("Task success found")
866
                sCnt += 1
S
Shuduo Sang 已提交
867 868 869
                if (sCnt >= 2):
                    raise RuntimeError(
                        "Unexpected more than 1 success with task: {}".format(cls))
870 871 872 873

    def assertIfExistThenSuccess(self, tasks, cls):
        sCnt = 0
        exists = False
S
Shuduo Sang 已提交
874
        for task in tasks:
875 876
            if not isinstance(task, cls):
                continue
S
Shuduo Sang 已提交
877
            exists = True  # we have a valid instance
878 879
            if task.isSuccess():
                sCnt += 1
S
Shuduo Sang 已提交
880 881 882
        if (exists and sCnt <= 0):
            raise RuntimeError(
                "Unexpected zero success for task: {}".format(cls))
883 884

    def assertNoTask(self, tasks, cls):
S
Shuduo Sang 已提交
885
        for task in tasks:
886
            if isinstance(task, cls):
S
Shuduo Sang 已提交
887 888
                raise CrashGenError(
                    "This task: {}, is not expected to be present, given the success/failure of others".format(cls.__name__))
889 890

    def assertNoSuccess(self, tasks, cls):
S
Shuduo Sang 已提交
891
        for task in tasks:
892 893
            if isinstance(task, cls):
                if task.isSuccess():
S
Shuduo Sang 已提交
894 895
                    raise RuntimeError(
                        "Unexpected successful task: {}".format(cls))
896 897

    def hasSuccess(self, tasks, cls):
S
Shuduo Sang 已提交
898
        for task in tasks:
899 900 901 902 903 904
            if not isinstance(task, cls):
                continue
            if task.isSuccess():
                return True
        return False

S
Steven Li 已提交
905
    def hasTask(self, tasks, cls):
S
Shuduo Sang 已提交
906
        for task in tasks:
S
Steven Li 已提交
907 908 909 910
            if isinstance(task, cls):
                return True
        return False

S
Shuduo Sang 已提交
911

912 913 914 915
class StateInvalid(AnyState):
    def getInfo(self):
        return [
            self.STATE_INVALID,
S
Shuduo Sang 已提交
916 917 918
            False, False,  # can create/drop Db
            False, False,  # can create/drop fixed table
            False, False,  # can insert/read data with fixed table
919 920 921 922
        ]

    # def verifyTasksToState(self, tasks, newState):

S
Shuduo Sang 已提交
923

924 925 926 927
class StateEmpty(AnyState):
    def getInfo(self):
        return [
            self.STATE_EMPTY,
S
Shuduo Sang 已提交
928 929 930
            True, False,  # can create/drop Db
            False, False,  # can create/drop fixed table
            False, False,  # can insert/read data with fixed table
931 932
        ]

S
Shuduo Sang 已提交
933 934
    def verifyTasksToState(self, tasks, newState):
        if (self.hasSuccess(tasks, TaskCreateDb)
935
                ):  # at EMPTY, if there's succes in creating DB
S
Shuduo Sang 已提交
936 937 938 939
            if (not self.hasTask(tasks, TaskDropDb)):  # and no drop_db tasks
                # we must have at most one. TODO: compare numbers
                self.assertAtMostOneSuccess(tasks, TaskCreateDb)

940 941 942 943 944 945 946 947 948 949 950

class StateDbOnly(AnyState):
    def getInfo(self):
        return [
            self.STATE_DB_ONLY,
            False, True,
            True, False,
            False, False,
        ]

    def verifyTasksToState(self, tasks, newState):
S
Shuduo Sang 已提交
951 952 953
        if (not self.hasTask(tasks, TaskCreateDb)):
            # only if we don't create any more
            self.assertAtMostOneSuccess(tasks, TaskDropDb)
954
        self.assertIfExistThenSuccess(tasks, TaskDropDb)
S
Steven Li 已提交
955
        # self.assertAtMostOneSuccess(tasks, CreateFixedTableTask) # not true in massively parrallel cases
956
        # Nothing to be said about adding data task
957
        # if ( self.hasSuccess(tasks, DropDbTask) ): # dropped the DB
S
Shuduo Sang 已提交
958 959 960
        # self.assertHasTask(tasks, DropDbTask) # implied by hasSuccess
        # self.assertAtMostOneSuccess(tasks, DropDbTask)
        # self._state = self.STATE_EMPTY
961 962
        # if ( self.hasSuccess(tasks, TaskCreateSuperTable) ): # did not drop db, create table success
        #     # self.assertHasTask(tasks, CreateFixedTableTask) # tried to create table
S
Shuduo Sang 已提交
963
        #     if ( not self.hasTask(tasks, TaskDropSuperTable) ):
964
        #         self.assertAtMostOneSuccess(tasks, TaskCreateSuperTable) # at most 1 attempt is successful, if we don't drop anything
S
Shuduo Sang 已提交
965 966 967 968 969 970
        # self.assertNoTask(tasks, DropDbTask) # should have have tried
        # if ( not self.hasSuccess(tasks, AddFixedDataTask) ): # just created table, no data yet
        #     # can't say there's add-data attempts, since they may all fail
        #     self._state = self.STATE_TABLE_ONLY
        # else:
        #     self._state = self.STATE_HAS_DATA
971 972 973 974
        # What about AddFixedData?
        # elif ( self.hasSuccess(tasks, AddFixedDataTask) ):
        #     self._state = self.STATE_HAS_DATA
        # else: # no success in dropping db tasks, no success in create fixed table? read data should also fail
S
Shuduo Sang 已提交
975
        #     # raise RuntimeError("Unexpected no-success scenario")   # We might just landed all failure tasks,
976 977
        #     self._state = self.STATE_DB_ONLY  # no change

S
Shuduo Sang 已提交
978

979
class StateSuperTableOnly(AnyState):
980 981 982 983 984 985 986 987 988
    def getInfo(self):
        return [
            self.STATE_TABLE_ONLY,
            False, True,
            False, True,
            True, True,
        ]

    def verifyTasksToState(self, tasks, newState):
S
Shuduo Sang 已提交
989
        if (self.hasSuccess(tasks, TaskDropSuperTable)
990
                ):  # we are able to drop the table
991
            #self.assertAtMostOneSuccess(tasks, TaskDropSuperTable)
S
Shuduo Sang 已提交
992 993
            # we must have had recreted it
            self.hasSuccess(tasks, TaskCreateSuperTable)
994

995
            # self._state = self.STATE_DB_ONLY
S
Steven Li 已提交
996 997
        # elif ( self.hasSuccess(tasks, AddFixedDataTask) ): # no success dropping the table, but added data
        #     self.assertNoTask(tasks, DropFixedTableTask) # not true in massively parrallel cases
998
            # self._state = self.STATE_HAS_DATA
S
Steven Li 已提交
999 1000 1001
        # elif ( self.hasSuccess(tasks, ReadFixedDataTask) ): # no success in prev cases, but was able to read data
            # self.assertNoTask(tasks, DropFixedTableTask)
            # self.assertNoTask(tasks, AddFixedDataTask)
1002
            # self._state = self.STATE_TABLE_ONLY # no change
S
Steven Li 已提交
1003 1004 1005
        # else: # did not drop table, did not insert data, did not read successfully, that is impossible
        #     raise RuntimeError("Unexpected no-success scenarios")
        # TODO: need to revamp!!
1006

S
Shuduo Sang 已提交
1007

1008 1009 1010 1011 1012 1013 1014 1015 1016 1017
class StateHasData(AnyState):
    def getInfo(self):
        return [
            self.STATE_HAS_DATA,
            False, True,
            False, True,
            True, True,
        ]

    def verifyTasksToState(self, tasks, newState):
S
Shuduo Sang 已提交
1018
        if (newState.equals(AnyState.STATE_EMPTY)):
1019
            self.hasSuccess(tasks, TaskDropDb)
S
Shuduo Sang 已提交
1020 1021 1022 1023
            if (not self.hasTask(tasks, TaskCreateDb)):
                self.assertAtMostOneSuccess(tasks, TaskDropDb)  # TODO: dicy
        elif (newState.equals(AnyState.STATE_DB_ONLY)):  # in DB only
            if (not self.hasTask(tasks, TaskCreateDb)
1024
                ):  # without a create_db task
S
Shuduo Sang 已提交
1025 1026
                # we must have drop_db task
                self.assertNoTask(tasks, TaskDropDb)
1027
            self.hasSuccess(tasks, TaskDropSuperTable)
1028
            # self.assertAtMostOneSuccess(tasks, DropFixedSuperTableTask) # TODO: dicy
1029 1030 1031 1032
        # elif ( newState.equals(AnyState.STATE_TABLE_ONLY) ): # data deleted
            # self.assertNoTask(tasks, TaskDropDb)
            # self.assertNoTask(tasks, TaskDropSuperTable)
            # self.assertNoTask(tasks, TaskAddData)
S
Steven Li 已提交
1033
            # self.hasSuccess(tasks, DeleteDataTasks)
S
Shuduo Sang 已提交
1034 1035
        else:  # should be STATE_HAS_DATA
            if (not self.hasTask(tasks, TaskCreateDb)
1036
                ):  # only if we didn't create one
S
Shuduo Sang 已提交
1037 1038 1039
                # we shouldn't have dropped it
                self.assertNoTask(tasks, TaskDropDb)
            if (not self.hasTask(tasks, TaskCreateSuperTable)
1040
                    ):  # if we didn't create the table
S
Shuduo Sang 已提交
1041 1042
                # we should not have a task that drops it
                self.assertNoTask(tasks, TaskDropSuperTable)
1043
            # self.assertIfExistThenSuccess(tasks, ReadFixedDataTask)
S
Steven Li 已提交
1044

S
Shuduo Sang 已提交
1045

1046
class StateMechine:
1047 1048
    def __init__(self, dbConn):
        self._dbConn = dbConn
S
Shuduo Sang 已提交
1049 1050 1051 1052 1053
        self._curState = self._findCurrentState()  # starting state
        # transitition target probabilities, indexed with value of STATE_EMPTY,
        # STATE_DB_ONLY, etc.
        self._stateWeights = [1, 3, 5, 15]

1054 1055 1056
    def getCurrentState(self):
        return self._curState

1057 1058 1059
    def hasDatabase(self):
        return self._curState.canDropDb()  # ha, can drop DB means it has one

1060
    # May be slow, use cautionsly...
S
Shuduo Sang 已提交
1061
    def getTaskTypes(self):  # those that can run (directly/indirectly) from the current state
1062 1063 1064 1065 1066 1067
        def typesToStrings(types):
            ss = []
            for t in types:
                ss.append(t.__name__)
            return ss

S
Shuduo Sang 已提交
1068
        allTaskClasses = StateTransitionTask.__subclasses__()  # all state transition tasks
1069 1070
        firstTaskTypes = []
        for tc in allTaskClasses:
S
Shuduo Sang 已提交
1071
            # t = tc(self) # create task object
1072 1073
            if tc.canBeginFrom(self._curState):
                firstTaskTypes.append(tc)
S
Shuduo Sang 已提交
1074 1075 1076 1077 1078 1079 1080 1081
        # now we have all the tasks that can begin directly from the current
        # state, let's figure out the INDIRECT ones
        taskTypes = firstTaskTypes.copy()  # have to have these
        for task1 in firstTaskTypes:  # each task type gathered so far
            endState = task1.getEndState()  # figure the end state
            if endState is None:  # does not change end state
                continue  # no use, do nothing
            for tc in allTaskClasses:  # what task can further begin from there?
1082
                if tc.canBeginFrom(endState) and (tc not in firstTaskTypes):
S
Shuduo Sang 已提交
1083
                    taskTypes.append(tc)  # gather it
1084 1085

        if len(taskTypes) <= 0:
S
Shuduo Sang 已提交
1086 1087 1088 1089 1090 1091 1092
            raise RuntimeError(
                "No suitable task types found for state: {}".format(
                    self._curState))
        logger.debug(
            "[OPS] Tasks found for state {}: {}".format(
                self._curState,
                typesToStrings(taskTypes)))
1093 1094 1095 1096
        return taskTypes

    def _findCurrentState(self):
        dbc = self._dbConn
S
Shuduo Sang 已提交
1097 1098
        ts = time.time()  # we use this to debug how fast/slow it is to do the various queries to find the current DB state
        if dbc.query("show databases") == 0:  # no database?!
1099
            # logger.debug("Found EMPTY state")
S
Shuduo Sang 已提交
1100 1101 1102
            logger.debug(
                "[STT] empty database found, between {} and {}".format(
                    ts, time.time()))
1103
            return StateEmpty()
S
Shuduo Sang 已提交
1104 1105 1106 1107
        # did not do this when openning connection, and this is NOT the worker
        # thread, which does this on their own
        dbc.execute("use db")
        if dbc.query("show tables") == 0:  # no tables
1108
            # logger.debug("Found DB ONLY state")
S
Shuduo Sang 已提交
1109 1110 1111
            logger.debug(
                "[STT] DB_ONLY found, between {} and {}".format(
                    ts, time.time()))
1112
            return StateDbOnly()
S
Shuduo Sang 已提交
1113 1114
        if dbc.query("SELECT * FROM db.{}".format(DbManager.getFixedSuperTableName())
                     ) == 0:  # no regular tables
1115
            # logger.debug("Found TABLE_ONLY state")
S
Shuduo Sang 已提交
1116 1117 1118
            logger.debug(
                "[STT] SUPER_TABLE_ONLY found, between {} and {}".format(
                    ts, time.time()))
1119
            return StateSuperTableOnly()
S
Shuduo Sang 已提交
1120
        else:  # has actual tables
1121
            # logger.debug("Found HAS_DATA state")
S
Shuduo Sang 已提交
1122 1123 1124
            logger.debug(
                "[STT] HAS_DATA found, between {} and {}".format(
                    ts, time.time()))
1125 1126 1127
            return StateHasData()

    def transition(self, tasks):
S
Shuduo Sang 已提交
1128
        if (len(tasks) == 0):  # before 1st step, or otherwise empty
1129
            logger.debug("[STT] Starting State: {}".format(self._curState))
S
Shuduo Sang 已提交
1130
            return  # do nothing
1131

S
Shuduo Sang 已提交
1132 1133
        # this should show up in the server log, separating steps
        self._dbConn.execute("show dnodes")
1134 1135 1136 1137

        # Generic Checks, first based on the start state
        if self._curState.canCreateDb():
            self._curState.assertIfExistThenSuccess(tasks, TaskCreateDb)
S
Shuduo Sang 已提交
1138 1139
            # self.assertAtMostOneSuccess(tasks, CreateDbTask) # not really, in
            # case of multiple creation and drops
1140 1141 1142

        if self._curState.canDropDb():
            self._curState.assertIfExistThenSuccess(tasks, TaskDropDb)
S
Shuduo Sang 已提交
1143 1144
            # self.assertAtMostOneSuccess(tasks, DropDbTask) # not really in
            # case of drop-create-drop
1145 1146 1147

        # if self._state.canCreateFixedTable():
            # self.assertIfExistThenSuccess(tasks, CreateFixedTableTask) # Not true, DB may be dropped
S
Shuduo Sang 已提交
1148 1149
            # self.assertAtMostOneSuccess(tasks, CreateFixedTableTask) # not
            # really, in case of create-drop-create
1150 1151 1152

        # if self._state.canDropFixedTable():
            # self.assertIfExistThenSuccess(tasks, DropFixedTableTask) # Not True, the whole DB may be dropped
S
Shuduo Sang 已提交
1153 1154
            # self.assertAtMostOneSuccess(tasks, DropFixedTableTask) # not
            # really in case of drop-create-drop
1155 1156

        # if self._state.canAddData():
S
Shuduo Sang 已提交
1157 1158
        # self.assertIfExistThenSuccess(tasks, AddFixedDataTask)  # not true
        # actually
1159 1160 1161 1162 1163 1164

        # if self._state.canReadData():
            # Nothing for sure

        newState = self._findCurrentState()
        logger.debug("[STT] New DB state determined: {}".format(newState))
S
Shuduo Sang 已提交
1165 1166
        # can old state move to new state through the tasks?
        self._curState.verifyTasksToState(tasks, newState)
1167 1168 1169
        self._curState = newState

    def pickTaskType(self):
S
Shuduo Sang 已提交
1170 1171
        # all the task types we can choose from at curent state
        taskTypes = self.getTaskTypes()
1172 1173 1174
        weights = []
        for tt in taskTypes:
            endState = tt.getEndState()
S
Shuduo Sang 已提交
1175 1176 1177
            if endState is not None:
                # TODO: change to a method
                weights.append(self._stateWeights[endState.getValIndex()])
1178
            else:
S
Shuduo Sang 已提交
1179 1180
                # read data task, default to 10: TODO: change to a constant
                weights.append(10)
1181
        i = self._weighted_choice_sub(weights)
S
Shuduo Sang 已提交
1182
        # logger.debug(" (weighted random:{}/{}) ".format(i, len(taskTypes)))
1183 1184
        return taskTypes[i]

S
Shuduo Sang 已提交
1185 1186 1187 1188 1189
    # ref:
    # https://eli.thegreenplace.net/2010/01/22/weighted-random-generation-in-python/
    def _weighted_choice_sub(self, weights):
        # TODO: use our dice to ensure it being determinstic?
        rnd = random.random() * sum(weights)
1190 1191 1192 1193
        for i, w in enumerate(weights):
            rnd -= w
            if rnd < 0:
                return i
1194

1195
# Manager of the Database Data/Connection
S
Shuduo Sang 已提交
1196 1197 1198 1199


class DbManager():
    def __init__(self, resetDb=True):
S
Steven Li 已提交
1200
        self.tableNumQueue = LinearQueue()
S
Shuduo Sang 已提交
1201 1202 1203
        # datetime.datetime(2019, 1, 1) # initial date time tick
        self._lastTick = self.setupLastTick()
        self._lastInt = 0  # next one is initial integer
1204
        self._lock = threading.RLock()
S
Shuduo Sang 已提交
1205

1206
        # self.openDbServerConnection()
S
Shuduo Sang 已提交
1207 1208
        self._dbConn = DbConn.createNative() if (
            gConfig.connector_type == 'native') else DbConn.createRest()
1209
        try:
S
Shuduo Sang 已提交
1210
            self._dbConn.open()  # may throw taos.error.ProgrammingError: disconnected
1211 1212
        except taos.error.ProgrammingError as err:
            # print("Error type: {}, msg: {}, value: {}".format(type(err), err.msg, err))
S
Shuduo Sang 已提交
1213 1214 1215
            if (err.msg == 'client disconnected'):  # cannot open DB connection
                print(
                    "Cannot establish DB connection, please re-run script without parameter, and follow the instructions.")
S
Steven Li 已提交
1216
                sys.exit(2)
1217
            else:
S
Shuduo Sang 已提交
1218 1219
                raise
        except BaseException:
S
Steven Li 已提交
1220
            print("[=] Unexpected exception")
S
Shuduo Sang 已提交
1221 1222 1223 1224
            raise

        if resetDb:
            self._dbConn.resetDb()  # drop and recreate DB
1225

S
Shuduo Sang 已提交
1226 1227
        # Do this after dbConn is in proper shape
        self._stateMachine = StateMechine(self._dbConn)
1228

1229 1230 1231
    def getDbConn(self):
        return self._dbConn

S
Shuduo Sang 已提交
1232
    def getStateMachine(self) -> StateMechine:
1233 1234 1235 1236
        return self._stateMachine

    # def getState(self):
    #     return self._stateMachine.getCurrentState()
1237 1238 1239 1240 1241 1242

    # We aim to create a starting time tick, such that, whenever we run our test here once
    # We should be able to safely create 100,000 records, which will not have any repeated time stamp
    # when we re-run the test in 3 minutes (180 seconds), basically we should expand time duration
    # by a factor of 500.
    # TODO: what if it goes beyond 10 years into the future
1243
    # TODO: fix the error as result of above: "tsdb timestamp is out of range"
1244
    def setupLastTick(self):
1245
        t1 = datetime.datetime(2020, 6, 1)
1246
        t2 = datetime.datetime.now()
S
Shuduo Sang 已提交
1247 1248 1249 1250
        # maybe a very large number, takes 69 years to exceed Python int range
        elSec = int(t2.timestamp() - t1.timestamp())
        elSec2 = (elSec % (8 * 12 * 30 * 24 * 60 * 60 / 500)) * \
            500  # a number representing seconds within 10 years
1251
        # print("elSec = {}".format(elSec))
S
Shuduo Sang 已提交
1252 1253 1254
        t3 = datetime.datetime(2012, 1, 1)  # default "keep" is 10 years
        t4 = datetime.datetime.fromtimestamp(
            t3.timestamp() + elSec2)  # see explanation above
1255 1256 1257
        logger.info("Setting up TICKS to start from: {}".format(t4))
        return t4

S
Shuduo Sang 已提交
1258
    def pickAndAllocateTable(self):  # pick any table, and "use" it
S
Steven Li 已提交
1259 1260
        return self.tableNumQueue.pickAndAllocate()

1261 1262 1263 1264 1265
    def addTable(self):
        with self._lock:
            tIndex = self.tableNumQueue.push()
        return tIndex

1266 1267
    @classmethod
    def getFixedSuperTableName(cls):
1268
        return "fs_table"
1269

S
Shuduo Sang 已提交
1270
    def releaseTable(self, i):  # return the table back, so others can use it
S
Steven Li 已提交
1271 1272
        self.tableNumQueue.release(i)

1273
    def getNextTick(self):
S
Shuduo Sang 已提交
1274 1275
        with self._lock:  # prevent duplicate tick
            if Dice.throw(10) == 0:  # 1 in 10 chance
S
Steven Li 已提交
1276
                return self._lastTick + datetime.timedelta(0, -100)
S
Shuduo Sang 已提交
1277 1278 1279
            else:  # regular
                # add one second to it
                self._lastTick += datetime.timedelta(0, 1)
S
Steven Li 已提交
1280
                return self._lastTick
1281 1282

    def getNextInt(self):
1283 1284 1285
        with self._lock:
            self._lastInt += 1
            return self._lastInt
1286 1287

    def getNextBinary(self):
S
Shuduo Sang 已提交
1288 1289
        return "Beijing_Shanghai_Los_Angeles_New_York_San_Francisco_Chicago_Beijing_Shanghai_Los_Angeles_New_York_San_Francisco_Chicago_{}".format(
            self.getNextInt())
1290 1291 1292

    def getNextFloat(self):
        return 0.9 + self.getNextInt()
S
Shuduo Sang 已提交
1293

S
Steven Li 已提交
1294
    def getTableNameToDelete(self):
S
Shuduo Sang 已提交
1295 1296
        tblNum = self.tableNumQueue.pop()  # TODO: race condition!
        if (not tblNum):  # maybe false
1297
            return False
S
Shuduo Sang 已提交
1298

S
Steven Li 已提交
1299 1300
        return "table_{}".format(tblNum)

1301
    def cleanUp(self):
S
Shuduo Sang 已提交
1302 1303
        self._dbConn.close()

1304

1305
class TaskExecutor():
1306
    class BoundedList:
S
Shuduo Sang 已提交
1307
        def __init__(self, size=10):
1308 1309 1310
            self._size = size
            self._list = []

S
Shuduo Sang 已提交
1311 1312
        def add(self, n: int):
            if not self._list:  # empty
1313 1314 1315 1316 1317 1318 1319
                self._list.append(n)
                return
            # now we should insert
            nItems = len(self._list)
            insPos = 0
            for i in range(nItems):
                insPos = i
S
Shuduo Sang 已提交
1320 1321 1322
                if n <= self._list[i]:  # smaller than this item, time to insert
                    break  # found the insertion point
                insPos += 1  # insert to the right
1323

S
Shuduo Sang 已提交
1324 1325
            if insPos == 0:  # except for the 1st item, # TODO: elimiate first item as gating item
                return  # do nothing
1326 1327

            # print("Inserting at postion {}, value: {}".format(insPos, n))
S
Shuduo Sang 已提交
1328 1329
            self._list.insert(insPos, n)  # insert

1330
            newLen = len(self._list)
S
Shuduo Sang 已提交
1331 1332 1333 1334 1335
            if newLen <= self._size:
                return  # do nothing
            elif newLen == (self._size + 1):
                del self._list[0]  # remove the first item
            else:
1336 1337 1338 1339 1340 1341 1342
                raise RuntimeError("Corrupt Bounded List")

        def __str__(self):
            return repr(self._list)

    _boundedList = BoundedList()

1343 1344 1345
    def __init__(self, curStep):
        self._curStep = curStep

1346 1347 1348 1349
    @classmethod
    def getBoundedList(cls):
        return cls._boundedList

1350 1351 1352
    def getCurStep(self):
        return self._curStep

S
Shuduo Sang 已提交
1353
    def execute(self, task: Task, wt: WorkerThread):  # execute a task on a thread
1354
        task.execute(wt)
1355

1356 1357 1358 1359
    def recordDataMark(self, n: int):
        # print("[{}]".format(n), end="", flush=True)
        self._boundedList.add(n)

1360 1361
    # def logInfo(self, msg):
    #     logger.info("    T[{}.x]: ".format(self._curStep) + msg)
1362

1363 1364
    # def logDebug(self, msg):
    #     logger.debug("    T[{}.x]: ".format(self._curStep) + msg)
1365

S
Shuduo Sang 已提交
1366

S
Steven Li 已提交
1367
class Task():
1368 1369 1370 1371
    taskSn = 100

    @classmethod
    def allocTaskNum(cls):
S
Shuduo Sang 已提交
1372
        Task.taskSn += 1  # IMPORTANT: cannot use cls.taskSn, since each sub class will have a copy
S
Steven Li 已提交
1373 1374
        # logger.debug("Allocating taskSN: {}".format(Task.taskSn))
        return Task.taskSn
1375

S
Shuduo Sang 已提交
1376
    def __init__(self, dbManager: DbManager, execStats: ExecutionStats):
1377
        self._dbManager = dbManager
S
Shuduo Sang 已提交
1378
        self._workerThread = None
1379
        self._err = None
1380
        self._aborted = False
1381
        self._curStep = None
S
Shuduo Sang 已提交
1382
        self._numRows = None  # Number of rows affected
1383

S
Shuduo Sang 已提交
1384
        # Assign an incremental task serial number
1385
        self._taskNum = self.allocTaskNum()
S
Steven Li 已提交
1386
        # logger.debug("Creating new task {}...".format(self._taskNum))
1387

1388
        self._execStats = execStats
S
Shuduo Sang 已提交
1389
        self._lastSql = ""  # last SQL executed/attempted
1390

1391
    def isSuccess(self):
S
Shuduo Sang 已提交
1392
        return self._err is None
1393

1394 1395 1396
    def isAborted(self):
        return self._aborted

S
Shuduo Sang 已提交
1397
    def clone(self):  # TODO: why do we need this again?
1398
        newTask = self.__class__(self._dbManager, self._execStats)
1399 1400 1401
        return newTask

    def logDebug(self, msg):
S
Shuduo Sang 已提交
1402 1403 1404
        self._workerThread.logDebug(
            "Step[{}.{}] {}".format(
                self._curStep, self._taskNum, msg))
1405 1406

    def logInfo(self, msg):
S
Shuduo Sang 已提交
1407 1408 1409
        self._workerThread.logInfo(
            "Step[{}.{}] {}".format(
                self._curStep, self._taskNum, msg))
1410

1411
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
S
Shuduo Sang 已提交
1412 1413 1414
        raise RuntimeError(
            "To be implemeted by child classes, class name: {}".format(
                self.__class__.__name__))
1415

1416 1417
    def execute(self, wt: WorkerThread):
        wt.verifyThreadSelf()
S
Shuduo Sang 已提交
1418
        self._workerThread = wt  # type: ignore
1419 1420

        te = wt.getTaskExecutor()
1421
        self._curStep = te.getCurStep()
S
Shuduo Sang 已提交
1422 1423
        self.logDebug(
            "[-] executing task {}...".format(self.__class__.__name__))
1424 1425

        self._err = None
S
Shuduo Sang 已提交
1426 1427
        self._execStats.beginTaskType(
            self.__class__.__name__)  # mark beginning
1428
        try:
S
Shuduo Sang 已提交
1429
            self._executeInternal(te, wt)  # TODO: no return value?
1430
        except taos.error.ProgrammingError as err:
S
Shuduo Sang 已提交
1431 1432
            errno2 = err.errno if (
                err.errno > 0) else 0x80000000 + err.errno  # correct error scheme
1433 1434 1435 1436
            if (gConfig.continue_on_exception):  # user choose to continue
                self.logDebug(
                    "[=] Continue after TAOS exception: errno=0x{:X}, msg: {}, SQL: {}".format(
                        errno2, err, self._lastSql))
1437
                self._err = err
1438
            elif (errno2 in [
S
Shuduo Sang 已提交
1439
                0x05,  # TSDB_CODE_RPC_NOT_READY
1440 1441 1442 1443
                0x200, 0x360, 0x362, 0x36A, 0x36B, 0x36D,
                0x381, 0x380, 0x383,
                0x386,  # DB is being dropped?!
                0x503,
S
Shuduo Sang 已提交
1444
                0x510,  # vnode not in ready state
1445
                0x600,
S
Shuduo Sang 已提交
1446 1447 1448 1449 1450
                1000  # REST catch-all error
            ]):  # allowed errors
                self.logDebug(
                    "[=] Acceptable Taos library exception: errno=0x{:X}, msg: {}, SQL: {}".format(
                        errno2, err, self._lastSql))
1451
                print("_", end="", flush=True)
S
Shuduo Sang 已提交
1452
                self._err = err
1453
            else:
S
Shuduo Sang 已提交
1454 1455
                errMsg = "[=] Unexpected Taos library exception: errno=0x{:X}, msg: {}, SQL: {}".format(
                    errno2, err, self._lastSql)
1456
                self.logDebug(errMsg)
S
Shuduo Sang 已提交
1457
                if gConfig.debug:
1458 1459
                    # raise # so that we see full stack
                    traceback.print_exc()
1460 1461
                print(
                    "\n\n----------------------------\nProgram ABORTED Due to Unexpected TAOS Error: \n\n{}\n".format(errMsg) +
1462 1463 1464 1465
                    "----------------------------\n")
                # sys.exit(-1)
                self._err = err
                self._aborted = True
S
Shuduo Sang 已提交
1466
        except Exception as e:
S
Steven Li 已提交
1467
            self.logInfo("Non-TAOS exception encountered")
S
Shuduo Sang 已提交
1468
            self._err = e
S
Steven Li 已提交
1469 1470
            self._aborted = True
            traceback.print_exc()
1471
        except BaseException as e:
1472
            self.logInfo("Python base exception encountered")
1473
            self._err = e
1474
            self._aborted = True
S
Steven Li 已提交
1475
            traceback.print_exc()
S
Shuduo Sang 已提交
1476 1477 1478 1479
        except BaseException:
            self.logDebug(
                "[=] Unexpected exception, SQL: {}".format(
                    self._lastSql))
1480
            raise
1481
        self._execStats.endTaskType(self.__class__.__name__, self.isSuccess())
S
Shuduo Sang 已提交
1482 1483 1484 1485 1486

        self.logDebug("[X] task execution completed, {}, status: {}".format(
            self.__class__.__name__, "Success" if self.isSuccess() else "Failure"))
        # TODO: merge with above.
        self._execStats.incExecCount(self.__class__.__name__, self.isSuccess())
S
Steven Li 已提交
1487

1488
    def execSql(self, sql):
1489
        self._lastSql = sql
1490
        return self._dbManager.execute(sql)
1491

S
Shuduo Sang 已提交
1492
    def execWtSql(self, wt: WorkerThread, sql):  # execute an SQL on the worker thread
1493 1494 1495
        self._lastSql = sql
        return wt.execSql(sql)

S
Shuduo Sang 已提交
1496
    def queryWtSql(self, wt: WorkerThread, sql):  # execute an SQL on the worker thread
1497 1498 1499
        self._lastSql = sql
        return wt.querySql(sql)

S
Shuduo Sang 已提交
1500
    def getQueryResult(self, wt: WorkerThread):  # execute an SQL on the worker thread
1501 1502 1503
        return wt.getQueryResult()


1504
class ExecutionStats:
1505
    def __init__(self):
S
Shuduo Sang 已提交
1506 1507
        # total/success times for a task
        self._execTimes: Dict[str, [int, int]] = {}
1508 1509 1510
        self._tasksInProgress = 0
        self._lock = threading.Lock()
        self._firstTaskStartTime = None
1511
        self._execStartTime = None
S
Shuduo Sang 已提交
1512 1513
        self._elapsedTime = 0.0  # total elapsed time
        self._accRunTime = 0.0  # accumulated run time
1514

1515 1516 1517
        self._failed = False
        self._failureReason = None

S
Steven Li 已提交
1518
    def __str__(self):
S
Shuduo Sang 已提交
1519 1520
        return "[ExecStats: _failed={}, _failureReason={}".format(
            self._failed, self._failureReason)
S
Steven Li 已提交
1521 1522

    def isFailed(self):
S
Shuduo Sang 已提交
1523
        return self._failed
S
Steven Li 已提交
1524

1525 1526 1527 1528 1529 1530
    def startExec(self):
        self._execStartTime = time.time()

    def endExec(self):
        self._elapsedTime = time.time() - self._execStartTime

S
Shuduo Sang 已提交
1531
    def incExecCount(self, klassName, isSuccess):  # TODO: add a lock here
1532 1533
        if klassName not in self._execTimes:
            self._execTimes[klassName] = [0, 0]
S
Shuduo Sang 已提交
1534 1535
        t = self._execTimes[klassName]  # tuple for the data
        t[0] += 1  # index 0 has the "total" execution times
1536
        if isSuccess:
S
Shuduo Sang 已提交
1537
            t[1] += 1  # index 1 has the "success" execution times
1538 1539 1540

    def beginTaskType(self, klassName):
        with self._lock:
S
Shuduo Sang 已提交
1541 1542
            if self._tasksInProgress == 0:  # starting a new round
                self._firstTaskStartTime = time.time()  # I am now the first task
1543 1544 1545 1546 1547
            self._tasksInProgress += 1

    def endTaskType(self, klassName, isSuccess):
        with self._lock:
            self._tasksInProgress -= 1
S
Shuduo Sang 已提交
1548
            if self._tasksInProgress == 0:  # all tasks have stopped
1549 1550 1551
                self._accRunTime += (time.time() - self._firstTaskStartTime)
                self._firstTaskStartTime = None

1552 1553 1554 1555
    def registerFailure(self, reason):
        self._failed = True
        self._failureReason = reason

1556
    def printStats(self):
S
Shuduo Sang 已提交
1557 1558 1559 1560 1561 1562
        logger.info(
            "----------------------------------------------------------------------")
        logger.info(
            "| Crash_Gen test {}, with the following stats:". format(
                "FAILED (reason: {})".format(
                    self._failureReason) if self._failed else "SUCCEEDED"))
1563
        logger.info("| Task Execution Times (success/total):")
1564
        execTimesAny = 0
S
Shuduo Sang 已提交
1565
        for k, n in self._execTimes.items():
1566
            execTimesAny += n[0]
S
Shuduo Sang 已提交
1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586
            logger.info("|    {0:<24}: {1}/{2}".format(k, n[1], n[0]))

        logger.info(
            "| Total Tasks Executed (success or not): {} ".format(execTimesAny))
        logger.info(
            "| Total Tasks In Progress at End: {}".format(
                self._tasksInProgress))
        logger.info(
            "| Total Task Busy Time (elapsed time when any task is in progress): {:.3f} seconds".format(
                self._accRunTime))
        logger.info(
            "| Average Per-Task Execution Time: {:.3f} seconds".format(self._accRunTime / execTimesAny))
        logger.info(
            "| Total Elapsed Time (from wall clock): {:.3f} seconds".format(
                self._elapsedTime))
        logger.info(
            "| Top numbers written: {}".format(
                TaskExecutor.getBoundedList()))
        logger.info(
            "----------------------------------------------------------------------")
1587 1588 1589


class StateTransitionTask(Task):
1590 1591 1592 1593 1594
    LARGE_NUMBER_OF_TABLES = 35
    SMALL_NUMBER_OF_TABLES = 3
    LARGE_NUMBER_OF_RECORDS = 50
    SMALL_NUMBER_OF_RECORDS = 3

1595
    @classmethod
S
Shuduo Sang 已提交
1596
    def getInfo(cls):  # each sub class should supply their own information
1597 1598
        raise RuntimeError("Overriding method expected")

S
Shuduo Sang 已提交
1599
    _endState = None
1600
    @classmethod
S
Shuduo Sang 已提交
1601
    def getEndState(cls):  # TODO: optimize by calling it fewer times
1602 1603
        raise RuntimeError("Overriding method expected")

1604 1605 1606
    # @classmethod
    # def getBeginStates(cls):
    #     return cls.getInfo()[0]
1607

1608 1609 1610
    # @classmethod
    # def getEndState(cls): # returning the class name
    #     return cls.getInfo()[0]
1611 1612

    @classmethod
1613 1614 1615
    def canBeginFrom(cls, state: AnyState):
        # return state.getValue() in cls.getBeginStates()
        raise RuntimeError("must be overriden")
1616

1617 1618 1619 1620
    @classmethod
    def getRegTableName(cls, i):
        return "db.reg_table_{}".format(i)

1621 1622
    def execute(self, wt: WorkerThread):
        super().execute(wt)
S
Shuduo Sang 已提交
1623 1624


1625
class TaskCreateDb(StateTransitionTask):
1626
    @classmethod
1627
    def getEndState(cls):
S
Shuduo Sang 已提交
1628
        return StateDbOnly()
1629

1630 1631 1632 1633
    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canCreateDb()

1634
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
S
Shuduo Sang 已提交
1635 1636
        self.execWtSql(wt, "create database db")

1637

1638
class TaskDropDb(StateTransitionTask):
1639
    @classmethod
1640 1641
    def getEndState(cls):
        return StateEmpty()
1642

1643 1644 1645 1646
    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canDropDb()

1647
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
1648
        self.execWtSql(wt, "drop database db")
S
Steven Li 已提交
1649
        logger.debug("[OPS] database dropped at {}".format(time.time()))
1650

S
Shuduo Sang 已提交
1651

1652
class TaskCreateSuperTable(StateTransitionTask):
1653
    @classmethod
1654 1655
    def getEndState(cls):
        return StateSuperTableOnly()
1656

1657 1658
    @classmethod
    def canBeginFrom(cls, state: AnyState):
1659
        return state.canCreateFixedSuperTable()
1660

1661
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
S
Shuduo Sang 已提交
1662
        if not wt.dbInUse():  # no DB yet, to the best of our knowledge
1663 1664 1665
            logger.debug("Skipping task, no DB yet")
            return

S
Shuduo Sang 已提交
1666
        tblName = self._dbManager.getFixedSuperTableName()
1667
        # wt.execSql("use db")    # should always be in place
S
Shuduo Sang 已提交
1668 1669 1670 1671 1672
        self.execWtSql(
            wt,
            "create table db.{} (ts timestamp, speed int) tags (b binary(200), f float) ".format(tblName))
        # No need to create the regular tables, INSERT will do that
        # automatically
1673

S
Steven Li 已提交
1674

1675
class TaskReadData(StateTransitionTask):
1676
    @classmethod
1677
    def getEndState(cls):
S
Shuduo Sang 已提交
1678
        return None  # meaning doesn't affect state
1679

1680 1681 1682 1683
    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canReadData()

1684
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
S
Shuduo Sang 已提交
1685 1686 1687
        sTbName = self._dbManager.getFixedSuperTableName()
        self.queryWtSql(wt, "select TBNAME from db.{}".format(
            sTbName))  # TODO: analyze result set later
1688

S
Shuduo Sang 已提交
1689 1690
        if random.randrange(
                5) == 0:  # 1 in 5 chance, simulate a broken connection. TODO: break connection in all situations
1691 1692
            wt.getDbConn().close()
            wt.getDbConn().open()
1693
        else:
S
Shuduo Sang 已提交
1694 1695
            # wt.getDbConn().getQueryResult()
            rTables = self.getQueryResult(wt)
1696
            # print("rTables[0] = {}, type = {}".format(rTables[0], type(rTables[0])))
S
Shuduo Sang 已提交
1697
            for rTbName in rTables:  # regular tables
1698
                self.execWtSql(wt, "select * from db.{}".format(rTbName[0]))
1699

1700 1701
        # tdSql.query(" cars where tbname in ('carzero', 'carone')")

S
Shuduo Sang 已提交
1702

1703
class TaskDropSuperTable(StateTransitionTask):
1704
    @classmethod
1705
    def getEndState(cls):
S
Shuduo Sang 已提交
1706
        return StateDbOnly()
1707

1708 1709
    @classmethod
    def canBeginFrom(cls, state: AnyState):
1710
        return state.canDropFixedSuperTable()
1711

1712
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
S
Shuduo Sang 已提交
1713 1714 1715 1716 1717 1718 1719
        # 1/2 chance, we'll drop the regular tables one by one, in a randomized
        # sequence
        if Dice.throw(2) == 0:
            tblSeq = list(range(
                2 + (self.LARGE_NUMBER_OF_TABLES if gConfig.larger_data else self.SMALL_NUMBER_OF_TABLES)))
            random.shuffle(tblSeq)
            tickOutput = False  # if we have spitted out a "d" character for "drop regular table"
1720
            isSuccess = True
S
Shuduo Sang 已提交
1721 1722 1723
            for i in tblSeq:
                regTableName = self.getRegTableName(
                    i)  # "db.reg_table_{}".format(i)
1724
                try:
S
Shuduo Sang 已提交
1725 1726 1727 1728 1729 1730 1731
                    self.execWtSql(wt, "drop table {}".format(
                        regTableName))  # nRows always 0, like MySQL
                except taos.error.ProgrammingError as err:
                    # correcting for strange error number scheme
                    errno2 = err.errno if (
                        err.errno > 0) else 0x80000000 + err.errno
                    if (errno2 in [0x362]):  # mnode invalid table name
1732
                        isSuccess = False
S
Shuduo Sang 已提交
1733 1734 1735
                        logger.debug(
                            "[DB] Acceptable error when dropping a table")
                    continue  # try to delete next regular table
1736 1737

                if (not tickOutput):
S
Shuduo Sang 已提交
1738 1739
                    tickOutput = True  # Print only one time
                    if isSuccess:
1740 1741
                        print("d", end="", flush=True)
                    else:
S
Shuduo Sang 已提交
1742
                        print("f", end="", flush=True)
1743 1744

        # Drop the super table itself
S
Shuduo Sang 已提交
1745
        tblName = self._dbManager.getFixedSuperTableName()
1746
        self.execWtSql(wt, "drop table db.{}".format(tblName))
1747

S
Shuduo Sang 已提交
1748

1749 1750 1751
class TaskAlterTags(StateTransitionTask):
    @classmethod
    def getEndState(cls):
S
Shuduo Sang 已提交
1752
        return None  # meaning doesn't affect state
1753 1754 1755

    @classmethod
    def canBeginFrom(cls, state: AnyState):
S
Shuduo Sang 已提交
1756
        return state.canDropFixedSuperTable()  # if we can drop it, we can alter tags
1757 1758

    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
S
Shuduo Sang 已提交
1759
        tblName = self._dbManager.getFixedSuperTableName()
1760
        dice = Dice.throw(4)
S
Shuduo Sang 已提交
1761
        if dice == 0:
1762
            sql = "alter table db.{} add tag extraTag int".format(tblName)
S
Shuduo Sang 已提交
1763
        elif dice == 1:
1764
            sql = "alter table db.{} drop tag extraTag".format(tblName)
S
Shuduo Sang 已提交
1765
        elif dice == 2:
1766
            sql = "alter table db.{} drop tag newTag".format(tblName)
S
Shuduo Sang 已提交
1767 1768 1769
        else:  # dice == 3
            sql = "alter table db.{} change tag extraTag newTag".format(
                tblName)
1770 1771

        self.execWtSql(wt, sql)
1772

S
Shuduo Sang 已提交
1773

1774
class TaskAddData(StateTransitionTask):
S
Shuduo Sang 已提交
1775 1776
    # Track which table is being actively worked on
    activeTable: Set[int] = set()
1777

S
Shuduo Sang 已提交
1778 1779
    # We use these two files to record operations to DB, useful for power-off
    # tests
1780 1781 1782 1783 1784
    fAddLogReady = None
    fAddLogDone = None

    @classmethod
    def prepToRecordOps(cls):
S
Shuduo Sang 已提交
1785 1786 1787 1788
        if gConfig.record_ops:
            if (cls.fAddLogReady is None):
                logger.info(
                    "Recording in a file operations to be performed...")
1789
                cls.fAddLogReady = open("add_log_ready.txt", "w")
S
Shuduo Sang 已提交
1790
            if (cls.fAddLogDone is None):
1791 1792
                logger.info("Recording in a file operations completed...")
                cls.fAddLogDone = open("add_log_done.txt", "w")
1793

1794
    @classmethod
1795 1796
    def getEndState(cls):
        return StateHasData()
1797 1798 1799 1800

    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canAddData()
S
Shuduo Sang 已提交
1801

1802
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
1803
        ds = self._dbManager
S
Shuduo Sang 已提交
1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814
        # wt.execSql("use db") # TODO: seems to be an INSERT bug to require
        # this
        tblSeq = list(
            range(
                self.LARGE_NUMBER_OF_TABLES if gConfig.larger_data else self.SMALL_NUMBER_OF_TABLES))
        random.shuffle(tblSeq)
        for i in tblSeq:
            if (i in self.activeTable):  # wow already active
                # logger.info("Concurrent data insertion into table: {}".format(i))
                # print("ct({})".format(i), end="", flush=True) # Concurrent
                # insertion into table
1815 1816
                print("x", end="", flush=True)
            else:
S
Shuduo Sang 已提交
1817 1818 1819 1820 1821 1822 1823 1824
                self.activeTable.add(i)  # marking it active
            # No need to shuffle data sequence, unless later we decide to do
            # non-increment insertion
            regTableName = self.getRegTableName(
                i)  # "db.reg_table_{}".format(i)
            for j in range(
                    self.LARGE_NUMBER_OF_RECORDS if gConfig.larger_data else self.SMALL_NUMBER_OF_RECORDS):  # number of records per table
                nextInt = ds.getNextInt()
1825 1826
                if gConfig.record_ops:
                    self.prepToRecordOps()
S
Shuduo Sang 已提交
1827 1828 1829
                    self.fAddLogReady.write(
                        "Ready to write {} to {}\n".format(
                            nextInt, regTableName))
1830 1831 1832
                    self.fAddLogReady.flush()
                    os.fsync(self.fAddLogReady)
                sql = "insert into {} using {} tags ('{}', {}) values ('{}', {});".format(
S
Shuduo Sang 已提交
1833 1834
                    regTableName,
                    ds.getFixedSuperTableName(),
1835
                    ds.getNextBinary(), ds.getNextFloat(),
1836
                    ds.getNextTick(), nextInt)
S
Shuduo Sang 已提交
1837 1838 1839
                self.execWtSql(wt, sql)
                # Successfully wrote the data into the DB, let's record it
                # somehow
1840
                te.recordDataMark(nextInt)
1841
                if gConfig.record_ops:
S
Shuduo Sang 已提交
1842 1843 1844
                    self.fAddLogDone.write(
                        "Wrote {} to {}\n".format(
                            nextInt, regTableName))
1845 1846
                    self.fAddLogDone.flush()
                    os.fsync(self.fAddLogDone)
S
Shuduo Sang 已提交
1847
            self.activeTable.discard(i)  # not raising an error, unlike remove
1848 1849


S
Steven Li 已提交
1850 1851
# Deterministic random number generator
class Dice():
S
Shuduo Sang 已提交
1852
    seeded = False  # static, uninitialized
S
Steven Li 已提交
1853 1854

    @classmethod
S
Shuduo Sang 已提交
1855
    def seed(cls, s):  # static
S
Steven Li 已提交
1856
        if (cls.seeded):
S
Shuduo Sang 已提交
1857 1858
            raise RuntimeError(
                "Cannot seed the random generator more than once")
S
Steven Li 已提交
1859 1860 1861 1862 1863
        cls.verifyRNG()
        random.seed(s)
        cls.seeded = True  # TODO: protect against multi-threading

    @classmethod
S
Shuduo Sang 已提交
1864
    def verifyRNG(cls):  # Verify that the RNG is determinstic
S
Steven Li 已提交
1865 1866 1867 1868
        random.seed(0)
        x1 = random.randrange(0, 1000)
        x2 = random.randrange(0, 1000)
        x3 = random.randrange(0, 1000)
S
Shuduo Sang 已提交
1869
        if (x1 != 864 or x2 != 394 or x3 != 776):
S
Steven Li 已提交
1870 1871 1872
            raise RuntimeError("System RNG is not deterministic")

    @classmethod
S
Shuduo Sang 已提交
1873
    def throw(cls, stop):  # get 0 to stop-1
1874
        return cls.throwRange(0, stop)
S
Steven Li 已提交
1875 1876

    @classmethod
S
Shuduo Sang 已提交
1877 1878
    def throwRange(cls, start, stop):  # up to stop-1
        if (not cls.seeded):
S
Steven Li 已提交
1879
            raise RuntimeError("Cannot throw dice before seeding it")
1880
        return random.randrange(start, stop)
S
Steven Li 已提交
1881 1882


S
Steven Li 已提交
1883 1884
class LoggingFilter(logging.Filter):
    def filter(self, record: logging.LogRecord):
S
Shuduo Sang 已提交
1885 1886
        if (record.levelno >= logging.INFO):
            return True  # info or above always log
S
Steven Li 已提交
1887

S
Steven Li 已提交
1888 1889 1890 1891
        # Commenting out below to adjust...

        # if msg.startswith("[TRD]"):
        #     return False
S
Steven Li 已提交
1892 1893
        return True

S
Shuduo Sang 已提交
1894 1895

class MyLoggingAdapter(logging.LoggerAdapter):
1896 1897 1898 1899
    def process(self, msg, kwargs):
        return "[{}]{}".format(threading.get_ident() % 10000, msg), kwargs
        # return '[%s] %s' % (self.extra['connid'], msg), kwargs

S
Shuduo Sang 已提交
1900 1901

class SvcManager:
1902
    def __init__(self):
1903
        print("Starting TDengine Service Manager")
1904 1905
        signal.signal(signal.SIGTERM, self.sigIntHandler)
        signal.signal(signal.SIGINT, self.sigIntHandler)
1906
        signal.signal(signal.SIGUSR1, self.sigUsrHandler)  # different handler!
1907

1908
        self.inSigHandler = False
1909 1910
        # self._status = MainExec.STATUS_RUNNING # set inside
        # _startTaosService()
1911
        self.svcMgrThread = None
1912

1913 1914 1915 1916 1917 1918 1919 1920
    def _doMenu(self):
        choice = ""
        while True:
            print("\nInterrupting Service Program, Choose an Action: ")
            print("1: Resume")
            print("2: Terminate")
            print("3: Restart")
            # Remember to update the if range below
S
Shuduo Sang 已提交
1921
            # print("Enter Choice: ", end="", flush=True)
1922 1923 1924
            while choice == "":
                choice = input("Enter Choice: ")
                if choice != "":
S
Shuduo Sang 已提交
1925 1926 1927
                    break  # done with reading repeated input
            if choice in ["1", "2", "3"]:
                break  # we are done with whole method
1928
            print("Invalid choice, please try again.")
S
Shuduo Sang 已提交
1929
            choice = ""  # reset
1930 1931
        return choice

S
Shuduo Sang 已提交
1932
    def sigUsrHandler(self, signalNumber, frame):
1933
        print("Interrupting main thread execution upon SIGUSR1")
1934
        if self.inSigHandler:  # already
1935
            print("Ignoring repeated SIG...")
S
Shuduo Sang 已提交
1936
            return  # do nothing if it's already not running
1937
        self.inSigHandler = True
1938 1939

        choice = self._doMenu()
S
Shuduo Sang 已提交
1940 1941 1942 1943 1944
        if choice == "1":
            # TODO: can the sub-process be blocked due to us not reading from
            # queue?
            self.sigHandlerResume()
        elif choice == "2":
1945
            self.stopTaosService()
S
Shuduo Sang 已提交
1946
        elif choice == "3":
1947 1948
            self.stopTaosService()
            self.startTaosService()
1949 1950
        else:
            raise RuntimeError("Invalid menu choice: {}".format(choice))
1951

1952 1953
        self.inSigHandler = False

1954
    def sigIntHandler(self, signalNumber, frame):
1955
        print("Sig INT Handler starting...")
1956
        if self.inSigHandler:
1957 1958
            print("Ignoring repeated SIG_INT...")
            return
1959
        self.inSigHandler = True
1960

S
Shuduo Sang 已提交
1961 1962
        self.stopTaosService()
        print("INT signal handler returning...")
1963
        self.inSigHandler = False
1964

S
Shuduo Sang 已提交
1965
    def sigHandlerResume(self):
1966
        print("Resuming TDengine service manager thread (main thread)...\n\n")
1967

1968
    def _checkServiceManagerThread(self):
1969 1970 1971 1972
        if self.svcMgrThread:  # valid svc mgr thread
            if self.svcMgrThread.isStopped():  # done?
                self.svcMgrThread.procIpcBatch()  # one last time. TODO: appropriate?
                self.svcMgrThread = None  # no more
1973 1974

    def _procIpcAll(self):
1975 1976 1977 1978 1979 1980 1981 1982
        while self.svcMgrThread:  # for as long as the svc mgr thread is still here
            self.svcMgrThread.procIpcBatch()  # regular processing,
            time.sleep(0.5)  # pause, before next round
            self._checkServiceManagerThread()
        print(
            "Service Manager Thread (with subprocess) has ended, main thread now exiting...")

    def startTaosService(self):
1983
        if self.svcMgrThread:
1984 1985 1986
            raise RuntimeError(
                "Cannot start TAOS service when one may already be running")
        self.svcMgrThread = ServiceManagerThread()  # create the object
1987 1988
        self.svcMgrThread.start()
        print("TAOS service started, printing out output...")
1989 1990 1991
        self.svcMgrThread.procIpcBatch(
            trimToTarget=10,
            forceOutput=True)  # for printing 10 lines
1992
        print("TAOS service started")
1993 1994

    def stopTaosService(self, outputLines=20):
1995 1996 1997 1998 1999
        print("Terminating Service Manager Thread (SMT) execution...")
        if not self.svcMgrThread:
            raise RuntimeError("Unexpected empty svc mgr thread")
        self.svcMgrThread.stop()
        if self.svcMgrThread.isStopped():
2000 2001
            self.svcMgrThread.procIpcBatch(outputLines)  # one last time
            self.svcMgrThread = None
2002 2003 2004 2005 2006 2007 2008
            print("----- End of TDengine Service Output -----\n")
            print("SMT execution terminated")
        else:
            print("WARNING: SMT did not terminate as expected")

    def run(self):
        self.startTaosService()
2009 2010 2011 2012 2013
        self._procIpcAll()  # pump/process all the messages
        if self.svcMgrThread:  # if sig handler hasn't destroyed it by now
            self.stopTaosService()  # should have started already


2014 2015 2016 2017 2018
class ServiceManagerThread:
    MAX_QUEUE_SIZE = 10000

    def __init__(self):
        self._tdeSubProcess = None
2019
        self._thread = None
2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036
        self._status = None

    def getStatus(self):
        return self._status

    def isRunning(self):
        # return self._thread and self._thread.is_alive()
        return self._status == MainExec.STATUS_RUNNING

    def isStopping(self):
        return self._status == MainExec.STATUS_STOPPING

    def isStopped(self):
        return self._status == MainExec.STATUS_STOPPED

    # Start the thread (with sub process), and wait for the sub service
    # to become fully operational
2037 2038
    def start(self):
        if self._thread:
2039
            raise RuntimeError("Unexpected _thread")
2040
        if self._tdeSubProcess:
2041 2042 2043 2044
            raise RuntimeError("TDengine sub process already created/running")

        self._status = MainExec.STATUS_STARTING

2045
        self._tdeSubProcess = TdeSubProcess()
2046 2047 2048 2049
        self._tdeSubProcess.start()

        self._ipcQueue = Queue()
        self._thread = threading.Thread(
2050
            target=self.svcOutputReader,
2051
            args=(self._tdeSubProcess.getStdOut(), self._ipcQueue))
2052
        self._thread.daemon = True  # thread dies with the program
2053 2054 2055
        self._thread.start()

        # wait for service to start
2056
        for i in range(0, 10):
2057 2058 2059
            time.sleep(1.0)
            # self.procIpcBatch() # don't pump message during start up
            print("_zz_", end="", flush=True)
2060
            if self._status == MainExec.STATUS_RUNNING:
2061
                logger.info("[] TDengine service READY to process requests")
2062 2063 2064
                return  # now we've started
        # TODO: handle this better?
        raise RuntimeError("TDengine service did not start successfully")
2065 2066 2067 2068 2069 2070

    def stop(self):
        # can be called from both main thread or signal handler
        print("Terminating TDengine service running as the sub process...")
        if self.isStopped():
            print("Service already stopped")
2071
            return
2072 2073 2074
        if self.isStopping():
            print("Service is already being stopped")
            return
2075 2076 2077 2078
        # Linux will send Control-C generated SIGINT to the TDengine process
        # already, ref:
        # https://unix.stackexchange.com/questions/176235/fork-and-how-signals-are-delivered-to-processes
        if not self._tdeSubProcess:
2079
            raise RuntimeError("sub process object missing")
2080

2081 2082 2083
        self._status = MainExec.STATUS_STOPPING
        self._tdeSubProcess.stop()

2084 2085 2086 2087
        if self._tdeSubProcess.isRunning():  # still running
            print(
                "FAILED to stop sub process, it is still running... pid = {}".format(
                    self.subProcess.pid))
2088
        else:
2089 2090 2091
            self._tdeSubProcess = None  # not running any more
            self.join()  # stop the thread, change the status, etc.

2092 2093 2094
    def join(self):
        # TODO: sanity check
        if not self.isStopping():
2095 2096 2097
            raise RuntimeError(
                "Unexpected status when ending svc mgr thread: {}".format(
                    self._status))
2098

2099
        if self._thread:
2100
            self._thread.join()
2101
            self._thread = None
2102
            self._status = MainExec.STATUS_STOPPED
S
Shuduo Sang 已提交
2103
        else:
2104
            print("Joining empty thread, doing nothing")
S
Shuduo Sang 已提交
2105

2106 2107
    def _trimQueue(self, targetSize):
        if targetSize <= 0:
2108
            return  # do nothing
2109
        q = self._ipcQueue
2110
        if (q.qsize() <= targetSize):  # no need to trim
2111 2112 2113 2114
            return

        logger.debug("Triming IPC queue to target size: {}".format(targetSize))
        itemsToTrim = q.qsize() - targetSize
2115
        for i in range(0, itemsToTrim):
2116 2117 2118
            try:
                q.get_nowait()
            except Empty:
2119 2120
                break  # break out of for loop, no more trimming

2121
    TD_READY_MSG = "TDengine is initialized successfully"
S
Shuduo Sang 已提交
2122

2123 2124
    def procIpcBatch(self, trimToTarget=0, forceOutput=False):
        self._trimQueue(trimToTarget)  # trim if necessary
S
Shuduo Sang 已提交
2125 2126
        # Process all the output generated by the underlying sub process,
        # managed by IO thread
2127
        print("<", end="", flush=True)
S
Shuduo Sang 已提交
2128 2129
        while True:
            try:
2130
                line = self._ipcQueue.get_nowait()  # getting output at fast speed
2131
                self._printProgress("_o")
2132 2133 2134
            except Empty:
                # time.sleep(2.3) # wait only if there's no output
                # no more output
2135
                print(".>", end="", flush=True)
S
Shuduo Sang 已提交
2136
                return  # we are done with THIS BATCH
2137
            else:  # got line, printing out
2138 2139 2140 2141 2142
                if forceOutput:
                    logger.info(line)
                else:
                    logger.debug(line)
        print(">", end="", flush=True)
2143

2144
    _ProgressBars = ["--", "//", "||", "\\\\"]
2145

2146 2147
    def _printProgress(self, msg):  # TODO: assuming 2 chars
        print(msg, end="", flush=True)
2148 2149 2150
        pBar = self._ProgressBars[Dice.throw(4)]
        print(pBar, end="", flush=True)
        print('\b\b\b\b', end="", flush=True)
2151

2152 2153 2154
    def svcOutputReader(self, out: IO, queue):
        # Important Reference: https://stackoverflow.com/questions/375427/non-blocking-read-on-a-subprocess-pipe-in-python
        # print("This is the svcOutput Reader...")
2155
        # for line in out :
2156 2157 2158 2159
        for line in iter(out.readline, b''):
            # print("Finished reading a line: {}".format(line))
            # print("Adding item to queue...")
            line = line.decode("utf-8").rstrip()
2160 2161
            # This might block, and then causing "out" buffer to block
            queue.put(line)
2162 2163
            self._printProgress("_i")

2164 2165 2166
            if self._status == MainExec.STATUS_STARTING:  # we are starting, let's see if we have started
                if line.find(self.TD_READY_MSG) != -1:  # found
                    self._status = MainExec.STATUS_RUNNING
2167 2168

            # Trim the queue if necessary: TODO: try this 1 out of 10 times
2169
            self._trimQueue(self.MAX_QUEUE_SIZE * 9 // 10)  # trim to 90% size
2170

2171 2172 2173
            if self.isStopping():  # TODO: use thread status instead
                # WAITING for stopping sub process to finish its outptu
                print("_w", end="", flush=True)
2174 2175

            # queue.put(line)
2176 2177
        # meaning sub process must have died
        print("\nNo more output from IO thread managing TDengine service")
2178
        out.close()
2179

2180 2181

class TdeSubProcess:
2182 2183 2184 2185 2186
    def __init__(self):
        self.subProcess = None

    def getStdOut(self):
        return self.subProcess.stdout
2187

2188
    def isRunning(self):
2189
        return self.subProcess is not None
2190

S
Shuduo Sang 已提交
2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205
    def getBuildPath(self):
        selfPath = os.path.dirname(os.path.realpath(__file__))
        if ("community" in selfPath):
            projPath = selfPath[:selfPath.find("communit")]
        else:
            projPath = selfPath[:selfPath.find("tests")]

        for root, dirs, files in os.walk(projPath):
            if ("taosd" in files):
                rootRealPath = os.path.dirname(os.path.realpath(root))
                if ("packaging" not in rootRealPath):
                    buildPath = root[:len(root) - len("/build/bin")]
                    break
        return buildPath

2206
    def start(self):
2207
        ON_POSIX = 'posix' in sys.builtin_module_names
S
Shuduo Sang 已提交
2208

2209 2210 2211
        taosdPath = self.getBuildPath() + "/build/bin/taosd"
        cfgPath = self.getBuildPath() + "/test/cfg"

S
Shuduo Sang 已提交
2212
        svcCmd = [taosdPath, '-c', cfgPath]
2213
        # svcCmd = ['vmstat', '1']
S
Shuduo Sang 已提交
2214
        if self.subProcess:  # already there
2215 2216
            raise RuntimeError("Corrupt process state")

S
Shuduo Sang 已提交
2217
        self.subProcess = subprocess.Popen(
S
Shuduo Sang 已提交
2218 2219
            svcCmd,
            stdout=subprocess.PIPE,
2220
            # bufsize=1, # not supported in binary mode
S
Shuduo Sang 已提交
2221
            close_fds=ON_POSIX)  # had text=True, which interferred with reading EOF
2222

2223
    def stop(self):
S
Shuduo Sang 已提交
2224
        if not self.subProcess:
2225
            print("Sub process already stopped")
2226 2227 2228
            return

        retCode = self.subProcess.poll()
S
Shuduo Sang 已提交
2229
        if retCode:  # valid return code, process ended
2230
            self.subProcess = None
S
Shuduo Sang 已提交
2231 2232
        else:  # process still alive, let's interrupt it
            print(
2233
                "Sub process is running, sending SIG_INT and waiting for it to terminate...")
S
Shuduo Sang 已提交
2234 2235 2236 2237
            # sub process should end, then IPC queue should end, causing IO
            # thread to end
            self.subProcess.send_signal(signal.SIGINT)
            try:
2238 2239 2240 2241
                self.subProcess.wait(10)
            except subprocess.TimeoutExpired as err:
                print("Time out waiting for TDengine service process to exit")
            else:
2242
                print("TDengine service process terminated successfully from SIG_INT")
2243 2244
                self.subProcess = None

S
Shuduo Sang 已提交
2245

2246 2247 2248 2249 2250 2251
class ClientManager:
    def __init__(self):
        print("Starting service manager")
        signal.signal(signal.SIGTERM, self.sigIntHandler)
        signal.signal(signal.SIGINT, self.sigIntHandler)

2252
        self._status = MainExec.STATUS_RUNNING
2253 2254 2255
        self.tc = None

    def sigIntHandler(self, signalNumber, frame):
2256
        if self._status != MainExec.STATUS_RUNNING:
2257
            print("Ignoring repeated SIGINT...")
S
Shuduo Sang 已提交
2258
            return  # do nothing if it's already not running
2259
        self._status = MainExec.STATUS_STOPPING  # immediately set our status
2260 2261 2262 2263

        print("Terminating program...")
        self.tc.requestToStop()

S
Shuduo Sang 已提交
2264
    def _printLastNumbers(self):  # to verify data durability
2265 2266
        dbManager = DbManager(resetDb=False)
        dbc = dbManager.getDbConn()
S
Shuduo Sang 已提交
2267
        if dbc.query("show databases") == 0:  # no databae
2268
            return
S
Shuduo Sang 已提交
2269
        if dbc.query("show tables") == 0:  # no tables
S
Steven Li 已提交
2270
            return
2271 2272

        dbc.execute("use db")
S
Shuduo Sang 已提交
2273
        sTbName = dbManager.getFixedSuperTableName()
2274 2275

        # get all regular tables
S
Shuduo Sang 已提交
2276 2277
        # TODO: analyze result set later
        dbc.query("select TBNAME from db.{}".format(sTbName))
2278 2279 2280
        rTables = dbc.getQueryResult()

        bList = TaskExecutor.BoundedList()
S
Shuduo Sang 已提交
2281
        for rTbName in rTables:  # regular tables
2282 2283
            dbc.query("select speed from db.{}".format(rTbName[0]))
            numbers = dbc.getQueryResult()
S
Shuduo Sang 已提交
2284
            for row in numbers:
2285 2286 2287 2288 2289 2290
                # print("<{}>".format(n), end="", flush=True)
                bList.add(row[0])

        print("Top numbers in DB right now: {}".format(bList))
        print("TDengine client execution is about to start in 2 seconds...")
        time.sleep(2.0)
S
Shuduo Sang 已提交
2291
        dbManager = None  # release?
2292 2293 2294 2295 2296

    def prepare(self):
        self._printLastNumbers()

    def run(self):
S
Shuduo Sang 已提交
2297
        if gConfig.auto_start_service:
2298 2299 2300
            svcMgr = SvcManager()
            svcMgr.startTaosService()

2301 2302
        self._printLastNumbers()

S
Shuduo Sang 已提交
2303
        dbManager = DbManager()  # Regular function
2304
        thPool = ThreadPool(gConfig.num_threads, gConfig.max_steps)
2305
        self.tc = ThreadCoordinator(thPool, dbManager)
S
Shuduo Sang 已提交
2306

2307
        self.tc.run()
S
Steven Li 已提交
2308 2309
        # print("exec stats: {}".format(self.tc.getExecStats()))
        # print("TC failed = {}".format(self.tc.isFailed()))
S
Shuduo Sang 已提交
2310
        if gConfig.auto_start_service:
2311
            svcMgr.stopTaosService()
2312 2313
        # Print exec status, etc., AFTER showing messages from the server
        self.conclude()
S
Steven Li 已提交
2314
        # print("TC failed (2) = {}".format(self.tc.isFailed()))
S
Shuduo Sang 已提交
2315 2316
        # Linux return code: ref https://shapeshed.com/unix-exit-codes/
        return 1 if self.tc.isFailed() else 0
2317 2318 2319

    def conclude(self):
        self.tc.printStats()
S
Shuduo Sang 已提交
2320
        self.tc.getDbManager().cleanUp()
2321 2322 2323


class MainExec:
2324 2325 2326
    STATUS_STARTING = 1
    STATUS_RUNNING = 2
    STATUS_STOPPING = 3
2327
    STATUS_STOPPED = 4
2328 2329 2330 2331

    @classmethod
    def runClient(cls):
        clientManager = ClientManager()
S
Steven Li 已提交
2332
        return clientManager.run()
2333 2334 2335

    @classmethod
    def runService(cls):
2336 2337
        svcManager = SvcManager()
        svcManager.run()
2338 2339

    @classmethod
S
Shuduo Sang 已提交
2340
    def runTemp(cls):  # for debugging purposes
2341 2342
        # # Hack to exercise reading from disk, imcreasing coverage. TODO: fix
        # dbc = dbState.getDbConn()
S
Shuduo Sang 已提交
2343
        # sTbName = dbState.getFixedSuperTableName()
2344 2345
        # dbc.execute("create database if not exists db")
        # if not dbState.getState().equals(StateEmpty()):
S
Shuduo Sang 已提交
2346
        #     dbc.execute("use db")
2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357

        # rTables = None
        # try: # the super table may not exist
        #     sql = "select TBNAME from db.{}".format(sTbName)
        #     logger.info("Finding out tables in super table: {}".format(sql))
        #     dbc.query(sql) # TODO: analyze result set later
        #     logger.info("Fetching result")
        #     rTables = dbc.getQueryResult()
        #     logger.info("Result: {}".format(rTables))
        # except taos.error.ProgrammingError as err:
        #     logger.info("Initial Super table OPS error: {}".format(err))
S
Shuduo Sang 已提交
2358

2359 2360 2361 2362 2363 2364 2365 2366
        # # sys.exit()
        # if ( not rTables == None):
        #     # print("rTables[0] = {}, type = {}".format(rTables[0], type(rTables[0])))
        #     try:
        #         for rTbName in rTables : # regular tables
        #             ds = dbState
        #             logger.info("Inserting into table: {}".format(rTbName[0]))
        #             sql = "insert into db.{} values ('{}', {});".format(
S
Shuduo Sang 已提交
2367
        #                 rTbName[0],
2368 2369
        #                 ds.getNextTick(), ds.getNextInt())
        #             dbc.execute(sql)
S
Shuduo Sang 已提交
2370
        #         for rTbName in rTables : # regular tables
2371
        #             dbc.query("select * from db.{}".format(rTbName[0])) # TODO: check success failure
S
Shuduo Sang 已提交
2372
        #         logger.info("Initial READING operation is successful")
2373
        #     except taos.error.ProgrammingError as err:
S
Shuduo Sang 已提交
2374 2375
        #         logger.info("Initial WRITE/READ error: {}".format(err))

2376 2377 2378
        # Sandbox testing code
        # dbc = dbState.getDbConn()
        # while True:
S
Shuduo Sang 已提交
2379
        #     rows = dbc.query("show databases")
2380
        #     print("Rows: {}, time={}".format(rows, time.time()))
S
Shuduo Sang 已提交
2381 2382
        return

S
Steven Li 已提交
2383

2384
def main():
S
Shuduo Sang 已提交
2385 2386
    # Super cool Python argument library:
    # https://docs.python.org/3/library/argparse.html
2387 2388 2389 2390 2391 2392 2393 2394 2395
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description=textwrap.dedent('''\
            TDengine Auto Crash Generator (PLEASE NOTICE the Prerequisites Below)
            ---------------------------------------------------------------------
            1. You build TDengine in the top level ./build directory, as described in offical docs
            2. You run the server there before this script: ./build/bin/taosd -c test/cfg

            '''))
2396

S
Shuduo Sang 已提交
2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447
    parser.add_argument(
        '-a',
        '--auto-start-service',
        action='store_true',
        help='Automatically start/stop the TDengine service (default: false)')
    parser.add_argument(
        '-c',
        '--connector-type',
        action='store',
        default='native',
        type=str,
        help='Connector type to use: native, rest, or mixed (default: 10)')
    parser.add_argument(
        '-d',
        '--debug',
        action='store_true',
        help='Turn on DEBUG mode for more logging (default: false)')
    parser.add_argument(
        '-e',
        '--run-tdengine',
        action='store_true',
        help='Run TDengine service in foreground (default: false)')
    parser.add_argument(
        '-l',
        '--larger-data',
        action='store_true',
        help='Write larger amount of data during write operations (default: false)')
    parser.add_argument(
        '-p',
        '--per-thread-db-connection',
        action='store_true',
        help='Use a single shared db connection (default: false)')
    parser.add_argument(
        '-r',
        '--record-ops',
        action='store_true',
        help='Use a pair of always-fsynced fils to record operations performing + performed, for power-off tests (default: false)')
    parser.add_argument(
        '-s',
        '--max-steps',
        action='store',
        default=1000,
        type=int,
        help='Maximum number of steps to run (default: 100)')
    parser.add_argument(
        '-t',
        '--num-threads',
        action='store',
        default=5,
        type=int,
        help='Number of threads to run (default: 10)')
2448 2449 2450 2451 2452
    parser.add_argument(
        '-x',
        '--continue-on-exception',
        action='store_true',
        help='Continue execution after encountering unexpected/disallowed errors/exceptions (default: false)')
2453

2454
    global gConfig
2455
    gConfig = parser.parse_args()
2456

2457
    # Logging Stuff
2458
    global logger
S
Shuduo Sang 已提交
2459 2460
    _logger = logging.getLogger('CrashGen')  # real logger
    _logger.addFilter(LoggingFilter())
2461 2462 2463
    ch = logging.StreamHandler()
    _logger.addHandler(ch)

S
Shuduo Sang 已提交
2464 2465
    # Logging adapter, to be used as a logger
    logger = MyLoggingAdapter(_logger, [])
2466

S
Shuduo Sang 已提交
2467 2468
    if (gConfig.debug):
        logger.setLevel(logging.DEBUG)  # default seems to be INFO
S
Steven Li 已提交
2469 2470
    else:
        logger.setLevel(logging.INFO)
S
Shuduo Sang 已提交
2471

2472 2473
    Dice.seed(0)  # initial seeding of dice

2474
    # Run server or client
S
Shuduo Sang 已提交
2475
    if gConfig.run_tdengine:  # run server
2476
        MainExec.runService()
S
Shuduo Sang 已提交
2477
    else:
S
Steven Li 已提交
2478
        return MainExec.runClient()
2479

S
Shuduo Sang 已提交
2480

2481
if __name__ == "__main__":
S
Steven Li 已提交
2482 2483 2484
    exitCode = main()
    # print("Exiting with code: {}".format(exitCode))
    sys.exit(exitCode)