crash_gen.py 85.6 KB
Newer Older
S
Shuduo Sang 已提交
1
# -----!/usr/bin/python3.7
S
Steven Li 已提交
2 3 4 5 6 7 8 9 10 11 12 13
###################################################################
#           Copyright (c) 2016 by TAOS Technologies, Inc.
#                     All rights reserved.
#
#  This file is proprietary and confidential to TAOS Technologies.
#  No part of this file may be reproduced, stored, transmitted,
#  disclosed or used in any form or by any means other than as
#  expressly provided by the written permission from Jianhui Tao
#
###################################################################

# -*- coding: utf-8 -*-
S
Shuduo Sang 已提交
14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
# For type hinting before definition, ref:
# https://stackoverflow.com/questions/33533148/how-do-i-specify-that-the-return-type-of-a-method-is-the-same-as-the-class-itsel
from __future__ import annotations
import taos
import crash_gen
from util.sql import *
from util.cases import *
from util.dnodes import *
from util.log import *
from queue import Queue, Empty
from typing import IO
from typing import Set
from typing import Dict
from typing import List
from requests.auth import HTTPBasicAuth
import textwrap
import datetime
import logging
import time
import random
import threading
import requests
import copy
import argparse
import getopt
39

S
Steven Li 已提交
40
import sys
41
import os
42 43
import io
import signal
44
import traceback
45 46 47 48
# Require Python 3
if sys.version_info[0] < 3:
    raise Exception("Must be using Python 3")

S
Steven Li 已提交
49

S
Shuduo Sang 已提交
50
# Global variables, tried to keep a small number.
51 52 53

# Command-line/Environment Configurations, will set a bit later
# ConfigNameSpace = argparse.Namespace
S
Shuduo Sang 已提交
54
gConfig = argparse.Namespace()  # Dummy value, will be replaced later
55
logger = None
S
Steven Li 已提交
56

S
Shuduo Sang 已提交
57 58

def runThread(wt: WorkerThread):
59
    wt.run()
60

S
Shuduo Sang 已提交
61

62 63
class CrashGenError(Exception):
    def __init__(self, msg=None, errno=None):
S
Shuduo Sang 已提交
64
        self.msg = msg
65
        self.errno = errno
S
Shuduo Sang 已提交
66

67 68 69
    def __str__(self):
        return self.msg

S
Shuduo Sang 已提交
70

S
Steven Li 已提交
71
class WorkerThread:
S
Shuduo Sang 已提交
72 73 74 75 76
    def __init__(self, pool: ThreadPool, tid,
                 tc: ThreadCoordinator,
                 # te: TaskExecutor,
                 ):  # note: main thread context!
        # self._curStep = -1
77
        self._pool = pool
S
Shuduo Sang 已提交
78 79
        self._tid = tid
        self._tc = tc  # type: ThreadCoordinator
S
Steven Li 已提交
80
        # self.threadIdent = threading.get_ident()
81 82
        self._thread = threading.Thread(target=runThread, args=(self,))
        self._stepGate = threading.Event()
S
Steven Li 已提交
83

84
        # Let us have a DB connection of our own
S
Shuduo Sang 已提交
85
        if (gConfig.per_thread_db_connection):  # type: ignore
86
            # print("connector_type = {}".format(gConfig.connector_type))
S
Shuduo Sang 已提交
87 88
            self._dbConn = DbConn.createNative() if (
                gConfig.connector_type == 'native') else DbConn.createRest()
89

S
Shuduo Sang 已提交
90
        self._dbInUse = False  # if "use db" was executed already
91

92
    def logDebug(self, msg):
S
Steven Li 已提交
93
        logger.debug("    TRD[{}] {}".format(self._tid, msg))
94 95

    def logInfo(self, msg):
S
Steven Li 已提交
96
        logger.info("    TRD[{}] {}".format(self._tid, msg))
97

98 99 100 101
    def dbInUse(self):
        return self._dbInUse

    def useDb(self):
S
Shuduo Sang 已提交
102
        if (not self._dbInUse):
103 104 105
            self.execSql("use db")
        self._dbInUse = True

106
    def getTaskExecutor(self):
S
Shuduo Sang 已提交
107
        return self._tc.getTaskExecutor()
108

S
Steven Li 已提交
109
    def start(self):
110
        self._thread.start()  # AFTER the thread is recorded
S
Steven Li 已提交
111

S
Shuduo Sang 已提交
112
    def run(self):
S
Steven Li 已提交
113
        # initialization after thread starts, in the thread context
114
        # self.isSleeping = False
115 116
        logger.info("Starting to run thread: {}".format(self._tid))

S
Shuduo Sang 已提交
117
        if (gConfig.per_thread_db_connection):  # type: ignore
118
            logger.debug("Worker thread openning database connection")
119
            self._dbConn.open()
S
Steven Li 已提交
120

S
Shuduo Sang 已提交
121 122
        self._doTaskLoop()

123
        # clean up
S
Shuduo Sang 已提交
124
        if (gConfig.per_thread_db_connection):  # type: ignore
125
            self._dbConn.close()
126

S
Shuduo Sang 已提交
127
    def _doTaskLoop(self):
128 129
        # while self._curStep < self._pool.maxSteps:
        # tc = ThreadCoordinator(None)
S
Shuduo Sang 已提交
130 131
        while True:
            tc = self._tc  # Thread Coordinator, the overall master
132
            tc.crossStepBarrier()  # shared barrier first, INCLUDING the last one
S
Shuduo Sang 已提交
133 134 135
            logger.debug(
                "[TRD] Worker thread [{}] exited barrier...".format(
                    self._tid))
136
            self.crossStepGate()   # then per-thread gate, after being tapped
S
Shuduo Sang 已提交
137 138 139
            logger.debug(
                "[TRD] Worker thread [{}] exited step gate...".format(
                    self._tid))
140
            if not self._tc.isRunning():
S
Shuduo Sang 已提交
141 142
                logger.debug(
                    "[TRD] Thread Coordinator not running any more, worker thread now stopping...")
143 144
                break

145
            # Fetch a task from the Thread Coordinator
S
Shuduo Sang 已提交
146 147 148
            logger.debug(
                "[TRD] Worker thread [{}] about to fetch task".format(
                    self._tid))
149
            task = tc.fetchTask()
150 151

            # Execute such a task
S
Shuduo Sang 已提交
152 153 154
            logger.debug(
                "[TRD] Worker thread [{}] about to execute task: {}".format(
                    self._tid, task.__class__.__name__))
155
            task.execute(self)
156
            tc.saveExecutedTask(task)
S
Shuduo Sang 已提交
157 158 159 160 161
            logger.debug(
                "[TRD] Worker thread [{}] finished executing task".format(
                    self._tid))

            self._dbInUse = False  # there may be changes between steps
162

S
Shuduo Sang 已提交
163 164
    def verifyThreadSelf(self):  # ensure we are called by this own thread
        if (threading.get_ident() != self._thread.ident):
S
Steven Li 已提交
165 166
            raise RuntimeError("Unexpectly called from other threads")

S
Shuduo Sang 已提交
167 168
    def verifyThreadMain(self):  # ensure we are called by the main thread
        if (threading.get_ident() != threading.main_thread().ident):
S
Steven Li 已提交
169 170 171
            raise RuntimeError("Unexpectly called from other threads")

    def verifyThreadAlive(self):
S
Shuduo Sang 已提交
172
        if (not self._thread.is_alive()):
S
Steven Li 已提交
173 174
            raise RuntimeError("Unexpected dead thread")

175
    # A gate is different from a barrier in that a thread needs to be "tapped"
S
Steven Li 已提交
176 177
    def crossStepGate(self):
        self.verifyThreadAlive()
S
Shuduo Sang 已提交
178 179
        self.verifyThreadSelf()  # only allowed by ourselves

180
        # Wait again at the "gate", waiting to be "tapped"
S
Shuduo Sang 已提交
181 182 183 184
        logger.debug(
            "[TRD] Worker thread {} about to cross the step gate".format(
                self._tid))
        self._stepGate.wait()
185
        self._stepGate.clear()
S
Shuduo Sang 已提交
186

187
        # self._curStep += 1  # off to a new step...
S
Steven Li 已提交
188

S
Shuduo Sang 已提交
189
    def tapStepGate(self):  # give it a tap, release the thread waiting there
S
Steven Li 已提交
190
        self.verifyThreadAlive()
S
Shuduo Sang 已提交
191 192
        self.verifyThreadMain()  # only allowed for main thread

S
Steven Li 已提交
193
        logger.debug("[TRD] Tapping worker thread {}".format(self._tid))
S
Shuduo Sang 已提交
194 195
        self._stepGate.set()  # wake up!
        time.sleep(0)  # let the released thread run a bit
196

S
Shuduo Sang 已提交
197 198 199
    def execSql(self, sql):  # TODO: expose DbConn directly
        if (gConfig.per_thread_db_connection):
            return self._dbConn.execute(sql)
200
        else:
201
            return self._tc.getDbManager().getDbConn().execute(sql)
202

S
Shuduo Sang 已提交
203 204 205
    def querySql(self, sql):  # TODO: expose DbConn directly
        if (gConfig.per_thread_db_connection):
            return self._dbConn.query(sql)
206 207 208 209
        else:
            return self._tc.getDbManager().getDbConn().query(sql)

    def getQueryResult(self):
S
Shuduo Sang 已提交
210 211
        if (gConfig.per_thread_db_connection):
            return self._dbConn.getQueryResult()
212 213 214
        else:
            return self._tc.getDbManager().getDbConn().getQueryResult()

215
    def getDbConn(self):
S
Shuduo Sang 已提交
216 217
        if (gConfig.per_thread_db_connection):
            return self._dbConn
218
        else:
219
            return self._tc.getDbManager().getDbConn()
220

221 222
    # def querySql(self, sql): # not "execute", since we are out side the DB context
    #     if ( gConfig.per_thread_db_connection ):
S
Shuduo Sang 已提交
223
    #         return self._dbConn.query(sql)
224 225
    #     else:
    #         return self._tc.getDbState().getDbConn().query(sql)
226

227
# The coordinator of all worker threads, mostly running in main thread
S
Shuduo Sang 已提交
228 229


230
class ThreadCoordinator:
231
    def __init__(self, pool: ThreadPool, dbManager):
S
Shuduo Sang 已提交
232
        self._curStep = -1  # first step is 0
233
        self._pool = pool
234
        # self._wd = wd
S
Shuduo Sang 已提交
235
        self._te = None  # prepare for every new step
236
        self._dbManager = dbManager
S
Shuduo Sang 已提交
237 238
        self._executedTasks: List[Task] = []  # in a given step
        self._lock = threading.RLock()  # sync access for a few things
S
Steven Li 已提交
239

S
Shuduo Sang 已提交
240 241
        self._stepBarrier = threading.Barrier(
            self._pool.numThreads + 1)  # one barrier for all threads
242
        self._execStats = ExecutionStats()
243
        self._runStatus = MainExec.STATUS_RUNNING
S
Steven Li 已提交
244

245 246 247
    def getTaskExecutor(self):
        return self._te

S
Shuduo Sang 已提交
248
    def getDbManager(self) -> DbManager:
249
        return self._dbManager
250

251 252 253
    def crossStepBarrier(self):
        self._stepBarrier.wait()

254 255 256 257
    def requestToStop(self):
        self._runStatus = MainExec.STATUS_STOPPING
        self._execStats.registerFailure("User Interruption")

S
Shuduo Sang 已提交
258
    def run(self):
259
        self._pool.createAndStartThreads(self)
S
Steven Li 已提交
260 261

        # Coordinate all threads step by step
S
Shuduo Sang 已提交
262 263 264
        self._curStep = -1  # not started yet
        maxSteps = gConfig.max_steps  # type: ignore
        self._execStats.startExec()  # start the stop watch
265 266
        transitionFailed = False
        hasAbortedTask = False
S
Shuduo Sang 已提交
267 268 269 270 271 272 273 274
        while(self._curStep < maxSteps - 1 and
              (not transitionFailed) and
              (self._runStatus == MainExec.STATUS_RUNNING) and
                (not hasAbortedTask)):  # maxStep==10, last curStep should be 9

            if not gConfig.debug:
                # print this only if we are not in debug mode
                print(".", end="", flush=True)
S
Steven Li 已提交
275
            logger.debug("[TRD] Main thread going to sleep")
276

277
            # Now main thread (that's us) is ready to enter a step
S
Shuduo Sang 已提交
278 279 280 281
            # let other threads go past the pool barrier, but wait at the
            # thread gate
            self.crossStepBarrier()
            self._stepBarrier.reset()  # Other worker threads should now be at the "gate"
282 283

            # At this point, all threads should be pass the overall "barrier" and before the per-thread "gate"
S
Shuduo Sang 已提交
284 285
            # We use this period to do house keeping work, when all worker
            # threads are QUIET.
286
            hasAbortedTask = False
S
Shuduo Sang 已提交
287 288
            for task in self._executedTasks:
                if task.isAborted():
289 290 291 292
                    print("Task aborted: {}".format(task))
                    hasAbortedTask = True
                    break

S
Shuduo Sang 已提交
293
            if hasAbortedTask:  # do transition only if tasks are error free
294
                self._execStats.registerFailure("Aborted Task Encountered")
S
Shuduo Sang 已提交
295
            else:
296 297 298
                try:
                    sm = self._dbManager.getStateMachine()
                    logger.debug("[STT] starting transitions")
S
Shuduo Sang 已提交
299 300
                    # at end of step, transiton the DB state
                    sm.transition(self._executedTasks)
301
                    logger.debug("[STT] transition ended")
S
Shuduo Sang 已提交
302 303 304
                    # Due to limitation (or maybe not) of the Python library,
                    # we cannot share connections across threads
                    if sm.hasDatabase():
305 306 307
                        for t in self._pool.threadList:
                            logger.debug("[DB] use db for all worker threads")
                            t.useDb()
S
Shuduo Sang 已提交
308 309
                            # t.execSql("use db") # main thread executing "use
                            # db" on behalf of every worker thread
310
                except taos.error.ProgrammingError as err:
S
Shuduo Sang 已提交
311
                    if (err.msg == 'network unavailable'):  # broken DB connection
312 313 314
                        logger.info("DB connection broken, execution failed")
                        traceback.print_stack()
                        transitionFailed = True
S
Shuduo Sang 已提交
315
                        self._te = None  # Not running any more
316
                        self._execStats.registerFailure("Broken DB Connection")
S
Shuduo Sang 已提交
317 318
                        # continue # don't do that, need to tap all threads at
                        # end, and maybe signal them to stop
319
                    else:
S
Shuduo Sang 已提交
320
                        raise
321 322
            # finally:
            #     pass
S
Shuduo Sang 已提交
323 324

            self.resetExecutedTasks()  # clear the tasks after we are done
325 326

            # Get ready for next step
S
Steven Li 已提交
327
            logger.debug("<-- Step {} finished".format(self._curStep))
S
Shuduo Sang 已提交
328 329 330 331
            self._curStep += 1  # we are about to get into next step. TODO: race condition here!
            # Now not all threads had time to go to sleep
            logger.debug(
                "\r\n\n--> Step {} starts with main thread waking up".format(self._curStep))
332

333
            # A new TE for the new step
S
Shuduo Sang 已提交
334
            if not transitionFailed:  # only if not failed
335
                self._te = TaskExecutor(self._curStep)
336

S
Shuduo Sang 已提交
337 338 339 340 341 342
            logger.debug(
                "[TRD] Main thread waking up at step {}, tapping worker threads".format(
                    self._curStep))  # Now not all threads had time to go to sleep
            # Worker threads will wake up at this point, and each execute it's
            # own task
            self.tapAllThreads()
S
Steven Li 已提交
343

344
        logger.debug("Main thread ready to finish up...")
S
Shuduo Sang 已提交
345 346
        if not transitionFailed:  # only in regular situations
            self.crossStepBarrier()  # Cross it one last time, after all threads finish
347 348
            self._stepBarrier.reset()
            logger.debug("Main thread in exclusive zone...")
S
Shuduo Sang 已提交
349
            self._te = None  # No more executor, time to end
350
            logger.debug("Main thread tapping all threads one last time...")
S
Shuduo Sang 已提交
351
            self.tapAllThreads()  # Let the threads run one last time
352

353
        logger.debug("Main thread joining all threads")
S
Shuduo Sang 已提交
354
        self._pool.joinAll()  # Get all threads to finish
355
        logger.info("\nAll worker threads finished")
356 357
        self._execStats.endExec()

358 359
    def printStats(self):
        self._execStats.printStats()
S
Steven Li 已提交
360

S
Steven Li 已提交
361 362 363 364 365 366
    def isFailed(self):
        return self._execStats.isFailed()

    def getExecStats(self):
        return self._execStats

S
Shuduo Sang 已提交
367
    def tapAllThreads(self):  # in a deterministic manner
S
Steven Li 已提交
368
        wakeSeq = []
S
Shuduo Sang 已提交
369 370
        for i in range(self._pool.numThreads):  # generate a random sequence
            if Dice.throw(2) == 1:
S
Steven Li 已提交
371 372 373
                wakeSeq.append(i)
            else:
                wakeSeq.insert(0, i)
S
Shuduo Sang 已提交
374 375 376
        logger.debug(
            "[TRD] Main thread waking up worker threads: {}".format(
                str(wakeSeq)))
377
        # TODO: set dice seed to a deterministic value
S
Steven Li 已提交
378
        for i in wakeSeq:
S
Shuduo Sang 已提交
379 380 381
            # TODO: maybe a bit too deep?!
            self._pool.threadList[i].tapStepGate()
            time.sleep(0)  # yield
S
Steven Li 已提交
382

383
    def isRunning(self):
S
Shuduo Sang 已提交
384
        return self._te is not None
385

S
Shuduo Sang 已提交
386 387
    def fetchTask(self) -> Task:
        if (not self.isRunning()):  # no task
388
            raise RuntimeError("Cannot fetch task when not running")
389 390
        # return self._wd.pickTask()
        # Alternatively, let's ask the DbState for the appropriate task
391 392 393 394 395 396 397
        # dbState = self.getDbState()
        # tasks = dbState.getTasksAtState() # TODO: create every time?
        # nTasks = len(tasks)
        # i = Dice.throw(nTasks)
        # logger.debug(" (dice:{}/{}) ".format(i, nTasks))
        # # return copy.copy(tasks[i]) # Needs a fresh copy, to save execution results, etc.
        # return tasks[i].clone() # TODO: still necessary?
S
Shuduo Sang 已提交
398 399 400 401 402
        # pick a task type for current state
        taskType = self.getDbManager().getStateMachine().pickTaskType()
        return taskType(
            self.getDbManager(),
            self._execStats)  # create a task from it
403 404

    def resetExecutedTasks(self):
S
Shuduo Sang 已提交
405
        self._executedTasks = []  # should be under single thread
406 407 408 409

    def saveExecutedTask(self, task):
        with self._lock:
            self._executedTasks.append(task)
410 411

# We define a class to run a number of threads in locking steps.
S
Shuduo Sang 已提交
412 413


414
class ThreadPool:
415
    def __init__(self, numThreads, maxSteps):
416 417 418 419
        self.numThreads = numThreads
        self.maxSteps = maxSteps
        # Internal class variables
        self.curStep = 0
S
Shuduo Sang 已提交
420 421
        self.threadList = []  # type: List[WorkerThread]

422
    # starting to run all the threads, in locking steps
423
    def createAndStartThreads(self, tc: ThreadCoordinator):
S
Shuduo Sang 已提交
424 425
        for tid in range(0, self.numThreads):  # Create the threads
            workerThread = WorkerThread(self, tid, tc)
426
            self.threadList.append(workerThread)
S
Shuduo Sang 已提交
427
            workerThread.start()  # start, but should block immediately before step 0
428 429 430 431 432 433

    def joinAll(self):
        for workerThread in self.threadList:
            logger.debug("Joining thread...")
            workerThread._thread.join()

434 435
# A queue of continguous POSITIVE integers, used by DbManager to generate continuous numbers
# for new table names
S
Shuduo Sang 已提交
436 437


S
Steven Li 已提交
438 439
class LinearQueue():
    def __init__(self):
440
        self.firstIndex = 1  # 1st ever element
S
Steven Li 已提交
441
        self.lastIndex = 0
S
Shuduo Sang 已提交
442 443
        self._lock = threading.RLock()  # our functions may call each other
        self.inUse = set()  # the indexes that are in use right now
S
Steven Li 已提交
444

445
    def toText(self):
S
Shuduo Sang 已提交
446 447
        return "[{}..{}], in use: {}".format(
            self.firstIndex, self.lastIndex, self.inUse)
448 449

    # Push (add new element, largest) to the tail, and mark it in use
S
Shuduo Sang 已提交
450
    def push(self):
451
        with self._lock:
S
Shuduo Sang 已提交
452 453
            # if ( self.isEmpty() ):
            #     self.lastIndex = self.firstIndex
454
            #     return self.firstIndex
455 456
            # Otherwise we have something
            self.lastIndex += 1
457 458
            self.allocate(self.lastIndex)
            # self.inUse.add(self.lastIndex) # mark it in use immediately
459
            return self.lastIndex
S
Steven Li 已提交
460 461

    def pop(self):
462
        with self._lock:
S
Shuduo Sang 已提交
463 464 465 466
            if (self.isEmpty()):
                # raise RuntimeError("Cannot pop an empty queue")
                return False  # TODO: None?

467
            index = self.firstIndex
S
Shuduo Sang 已提交
468
            if (index in self.inUse):
469 470
                return False

471 472 473 474 475 476 477
            self.firstIndex += 1
            return index

    def isEmpty(self):
        return self.firstIndex > self.lastIndex

    def popIfNotEmpty(self):
478
        with self._lock:
479 480 481 482
            if (self.isEmpty()):
                return 0
            return self.pop()

S
Steven Li 已提交
483
    def allocate(self, i):
484
        with self._lock:
485
            # logger.debug("LQ allocating item {}".format(i))
S
Shuduo Sang 已提交
486 487 488
            if (i in self.inUse):
                raise RuntimeError(
                    "Cannot re-use same index in queue: {}".format(i))
489 490
            self.inUse.add(i)

S
Steven Li 已提交
491
    def release(self, i):
492
        with self._lock:
493
            # logger.debug("LQ releasing item {}".format(i))
S
Shuduo Sang 已提交
494
            self.inUse.remove(i)  # KeyError possible, TODO: why?
495 496 497 498

    def size(self):
        return self.lastIndex + 1 - self.firstIndex

S
Steven Li 已提交
499
    def pickAndAllocate(self):
S
Shuduo Sang 已提交
500
        if (self.isEmpty()):
501 502
            return None
        with self._lock:
S
Shuduo Sang 已提交
503
            cnt = 0  # counting the interations
504 505
            while True:
                cnt += 1
S
Shuduo Sang 已提交
506
                if (cnt > self.size() * 10):  # 10x iteration already
507 508
                    # raise RuntimeError("Failed to allocate LinearQueue element")
                    return None
S
Shuduo Sang 已提交
509 510
                ret = Dice.throwRange(self.firstIndex, self.lastIndex + 1)
                if (ret not in self.inUse):
511 512 513
                    self.allocate(ret)
                    return ret

S
Shuduo Sang 已提交
514

515
class DbConn:
516 517 518 519 520 521 522 523 524 525 526
    TYPE_NATIVE = "native-c"
    TYPE_REST = "rest-api"
    TYPE_INVALID = "invalid"

    @classmethod
    def create(cls, connType):
        if connType == cls.TYPE_NATIVE:
            return DbConnNative()
        elif connType == cls.TYPE_REST:
            return DbConnRest()
        else:
S
Shuduo Sang 已提交
527 528
            raise RuntimeError(
                "Unexpected connection type: {}".format(connType))
529 530 531 532 533 534 535 536 537

    @classmethod
    def createNative(cls):
        return cls.create(cls.TYPE_NATIVE)

    @classmethod
    def createRest(cls):
        return cls.create(cls.TYPE_REST)

538 539
    def __init__(self):
        self.isOpen = False
540 541 542
        self._type = self.TYPE_INVALID

    def open(self):
S
Shuduo Sang 已提交
543
        if (self.isOpen):
544 545
            raise RuntimeError("Cannot re-open an existing DB connection")

546 547
        # below implemented by child classes
        self.openByType()
548

S
Shuduo Sang 已提交
549 550 551
        logger.debug(
            "[DB] data connection opened, type = {}".format(
                self._type))
552 553
        self.isOpen = True

S
Shuduo Sang 已提交
554 555 556 557
    def resetDb(self):  # reset the whole database, etc.
        if (not self.isOpen):
            raise RuntimeError(
                "Cannot reset database until connection is open")
558 559
        # self._tdSql.prepare() # Recreate database, etc.

560
        self.execute('drop database if exists db')
561 562
        logger.debug("Resetting DB, dropped database")
        # self._cursor.execute('create database db')
563
        # self._cursor.execute('use db')
564 565
        # tdSql.execute('show databases')

S
Shuduo Sang 已提交
566
    def queryScalar(self, sql) -> int:
567 568
        return self._queryAny(sql)

S
Shuduo Sang 已提交
569
    def queryString(self, sql) -> str:
570 571
        return self._queryAny(sql)

S
Shuduo Sang 已提交
572 573 574 575
    def _queryAny(self, sql):  # actual query result as an int
        if (not self.isOpen):
            raise RuntimeError(
                "Cannot query database until connection is open")
576
        nRows = self.query(sql)
S
Shuduo Sang 已提交
577 578 579 580
        if nRows != 1:
            raise RuntimeError(
                "Unexpected result for query: {}, rows = {}".format(
                    sql, nRows))
581
        if self.getResultRows() != 1 or self.getResultCols() != 1:
S
Shuduo Sang 已提交
582 583
            raise RuntimeError(
                "Unexpected result set for query: {}".format(sql))
584 585 586 587
        return self.getQueryResult()[0][0]

    def execute(self, sql):
        raise RuntimeError("Unexpected execution, should be overriden")
S
Shuduo Sang 已提交
588

589 590
    def openByType(self):
        raise RuntimeError("Unexpected execution, should be overriden")
S
Shuduo Sang 已提交
591

592 593
    def getQueryResult(self):
        raise RuntimeError("Unexpected execution, should be overriden")
S
Shuduo Sang 已提交
594

595 596
    def getResultRows(self):
        raise RuntimeError("Unexpected execution, should be overriden")
S
Shuduo Sang 已提交
597

598 599 600 601
    def getResultCols(self):
        raise RuntimeError("Unexpected execution, should be overriden")

# Sample: curl -u root:taosdata -d "show databases" localhost:6020/rest/sql
S
Shuduo Sang 已提交
602 603


604 605 606 607
class DbConnRest(DbConn):
    def __init__(self):
        super().__init__()
        self._type = self.TYPE_REST
S
Shuduo Sang 已提交
608
        self._url = "http://localhost:6020/rest/sql"  # fixed for now
609 610
        self._result = None

S
Shuduo Sang 已提交
611 612 613
    def openByType(self):  # Open connection
        pass  # do nothing, always open

614
    def close(self):
S
Shuduo Sang 已提交
615 616 617
        if (not self.isOpen):
            raise RuntimeError(
                "Cannot clean up database until connection is open")
618 619 620 621 622
        # Do nothing for REST
        logger.debug("[DB] REST Database connection closed")
        self.isOpen = False

    def _doSql(self, sql):
S
Shuduo Sang 已提交
623 624 625
        r = requests.post(self._url,
                          data=sql,
                          auth=HTTPBasicAuth('root', 'taosdata'))
626 627
        rj = r.json()
        # Sanity check for the "Json Result"
S
Shuduo Sang 已提交
628
        if ('status' not in rj):
629 630
            raise RuntimeError("No status in REST response")

S
Shuduo Sang 已提交
631 632 633 634
        if rj['status'] == 'error':  # clearly reported error
            if ('code' not in rj):  # error without code
                raise RuntimeError("REST error return without code")
            errno = rj['code']  # May need to massage this in the future
635
            # print("Raising programming error with REST return: {}".format(rj))
S
Shuduo Sang 已提交
636 637
            raise taos.error.ProgrammingError(
                rj['desc'], errno)  # todo: check existance of 'desc'
638

S
Shuduo Sang 已提交
639 640 641 642
        if rj['status'] != 'succ':  # better be this
            raise RuntimeError(
                "Unexpected REST return status: {}".format(
                    rj['status']))
643 644

        nRows = rj['rows'] if ('rows' in rj) else 0
S
Shuduo Sang 已提交
645
        self._result = rj
646 647
        return nRows

S
Shuduo Sang 已提交
648 649 650 651
    def execute(self, sql):
        if (not self.isOpen):
            raise RuntimeError(
                "Cannot execute database commands until connection is open")
652 653
        logger.debug("[SQL-REST] Executing SQL: {}".format(sql))
        nRows = self._doSql(sql)
S
Shuduo Sang 已提交
654 655
        logger.debug(
            "[SQL-REST] Execution Result, nRows = {}, SQL = {}".format(nRows, sql))
656 657
        return nRows

S
Shuduo Sang 已提交
658
    def query(self, sql):  # return rows affected
659 660 661 662 663 664 665 666 667 668 669 670 671
        return self.execute(sql)

    def getQueryResult(self):
        return self._result['data']

    def getResultRows(self):
        print(self._result)
        raise RuntimeError("TBD")
        # return self._tdSql.queryRows

    def getResultCols(self):
        print(self._result)
        raise RuntimeError("TBD")
S
Shuduo Sang 已提交
672 673


674 675 676 677
class DbConnNative(DbConn):
    def __init__(self):
        super().__init__()
        self._type = self.TYPE_REST
S
Shuduo Sang 已提交
678
        self._conn = None
679
        self._cursor = None
S
Shuduo Sang 已提交
680

681 682 683 684 685 686 687 688 689 690 691
    def getBuildPath(self):
        selfPath = os.path.dirname(os.path.realpath(__file__))
        if ("community" in selfPath):
            projPath = selfPath[:selfPath.find("communit")]
        else:
            projPath = selfPath[:selfPath.find("tests")]

        for root, dirs, files in os.walk(projPath):
            if ("taosd" in files):
                rootRealPath = os.path.dirname(os.path.realpath(root))
                if ("packaging" not in rootRealPath):
S
Shuduo Sang 已提交
692
                    buildPath = root[:len(root) - len("/build/bin")]
693 694 695
                    break
        return buildPath

S
Shuduo Sang 已提交
696
    def openByType(self):  # Open connection
697
        cfgPath = self.getBuildPath() + "/test/cfg"
S
Shuduo Sang 已提交
698 699 700
        self._conn = taos.connect(
            host="127.0.0.1",
            config=cfgPath)  # TODO: make configurable
701 702 703 704
        self._cursor = self._conn.cursor()

        # Get the connection/cursor ready
        self._cursor.execute('reset query cache')
S
Shuduo Sang 已提交
705 706
        # self._cursor.execute('use db') # do this at the beginning of every
        # step
707 708 709 710

        # Open connection
        self._tdSql = TDSql()
        self._tdSql.init(self._cursor)
S
Shuduo Sang 已提交
711

712
    def close(self):
S
Shuduo Sang 已提交
713 714 715
        if (not self.isOpen):
            raise RuntimeError(
                "Cannot clean up database until connection is open")
716
        self._tdSql.close()
717
        logger.debug("[DB] Database connection closed")
718
        self.isOpen = False
S
Steven Li 已提交
719

S
Shuduo Sang 已提交
720 721 722 723
    def execute(self, sql):
        if (not self.isOpen):
            raise RuntimeError(
                "Cannot execute database commands until connection is open")
724 725
        logger.debug("[SQL] Executing SQL: {}".format(sql))
        nRows = self._tdSql.execute(sql)
S
Shuduo Sang 已提交
726 727 728
        logger.debug(
            "[SQL] Execution Result, nRows = {}, SQL = {}".format(
                nRows, sql))
729
        return nRows
S
Steven Li 已提交
730

S
Shuduo Sang 已提交
731 732 733 734
    def query(self, sql):  # return rows affected
        if (not self.isOpen):
            raise RuntimeError(
                "Cannot query database until connection is open")
735 736
        logger.debug("[SQL] Executing SQL: {}".format(sql))
        nRows = self._tdSql.query(sql)
S
Shuduo Sang 已提交
737 738 739
        logger.debug(
            "[SQL] Query Result, nRows = {}, SQL = {}".format(
                nRows, sql))
740
        return nRows
741
        # results are in: return self._tdSql.queryResult
742

743 744 745
    def getQueryResult(self):
        return self._tdSql.queryResult

746 747
    def getResultRows(self):
        return self._tdSql.queryRows
748

749 750
    def getResultCols(self):
        return self._tdSql.queryCols
751

S
Shuduo Sang 已提交
752

753
class AnyState:
S
Shuduo Sang 已提交
754 755 756
    STATE_INVALID = -1
    STATE_EMPTY = 0  # nothing there, no even a DB
    STATE_DB_ONLY = 1  # we have a DB, but nothing else
757
    STATE_TABLE_ONLY = 2  # we have a table, but totally empty
S
Shuduo Sang 已提交
758
    STATE_HAS_DATA = 3  # we have some data in the table
759 760 761 762 763
    _stateNames = ["Invalid", "Empty", "DB_Only", "Table_Only", "Has_Data"]

    STATE_VAL_IDX = 0
    CAN_CREATE_DB = 1
    CAN_DROP_DB = 2
764 765
    CAN_CREATE_FIXED_SUPER_TABLE = 3
    CAN_DROP_FIXED_SUPER_TABLE = 4
766 767 768 769 770 771 772
    CAN_ADD_DATA = 5
    CAN_READ_DATA = 6

    def __init__(self):
        self._info = self.getInfo()

    def __str__(self):
S
Shuduo Sang 已提交
773 774
        # -1 hack to accomodate the STATE_INVALID case
        return self._stateNames[self._info[self.STATE_VAL_IDX] + 1]
775 776 777 778

    def getInfo(self):
        raise RuntimeError("Must be overriden by child classes")

S
Steven Li 已提交
779 780 781 782 783 784
    def equals(self, other):
        if isinstance(other, int):
            return self.getValIndex() == other
        elif isinstance(other, AnyState):
            return self.getValIndex() == other.getValIndex()
        else:
S
Shuduo Sang 已提交
785 786 787
            raise RuntimeError(
                "Unexpected comparison, type = {}".format(
                    type(other)))
S
Steven Li 已提交
788

789 790 791
    def verifyTasksToState(self, tasks, newState):
        raise RuntimeError("Must be overriden by child classes")

S
Steven Li 已提交
792 793 794
    def getValIndex(self):
        return self._info[self.STATE_VAL_IDX]

795 796
    def getValue(self):
        return self._info[self.STATE_VAL_IDX]
S
Shuduo Sang 已提交
797

798 799
    def canCreateDb(self):
        return self._info[self.CAN_CREATE_DB]
S
Shuduo Sang 已提交
800

801 802
    def canDropDb(self):
        return self._info[self.CAN_DROP_DB]
S
Shuduo Sang 已提交
803

804 805
    def canCreateFixedSuperTable(self):
        return self._info[self.CAN_CREATE_FIXED_SUPER_TABLE]
S
Shuduo Sang 已提交
806

807 808
    def canDropFixedSuperTable(self):
        return self._info[self.CAN_DROP_FIXED_SUPER_TABLE]
S
Shuduo Sang 已提交
809

810 811
    def canAddData(self):
        return self._info[self.CAN_ADD_DATA]
S
Shuduo Sang 已提交
812

813 814 815 816 817
    def canReadData(self):
        return self._info[self.CAN_READ_DATA]

    def assertAtMostOneSuccess(self, tasks, cls):
        sCnt = 0
S
Shuduo Sang 已提交
818
        for task in tasks:
819 820 821
            if not isinstance(task, cls):
                continue
            if task.isSuccess():
S
Steven Li 已提交
822
                # task.logDebug("Task success found")
823
                sCnt += 1
S
Shuduo Sang 已提交
824 825 826
                if (sCnt >= 2):
                    raise RuntimeError(
                        "Unexpected more than 1 success with task: {}".format(cls))
827 828 829 830

    def assertIfExistThenSuccess(self, tasks, cls):
        sCnt = 0
        exists = False
S
Shuduo Sang 已提交
831
        for task in tasks:
832 833
            if not isinstance(task, cls):
                continue
S
Shuduo Sang 已提交
834
            exists = True  # we have a valid instance
835 836
            if task.isSuccess():
                sCnt += 1
S
Shuduo Sang 已提交
837 838 839
        if (exists and sCnt <= 0):
            raise RuntimeError(
                "Unexpected zero success for task: {}".format(cls))
840 841

    def assertNoTask(self, tasks, cls):
S
Shuduo Sang 已提交
842
        for task in tasks:
843
            if isinstance(task, cls):
S
Shuduo Sang 已提交
844 845
                raise CrashGenError(
                    "This task: {}, is not expected to be present, given the success/failure of others".format(cls.__name__))
846 847

    def assertNoSuccess(self, tasks, cls):
S
Shuduo Sang 已提交
848
        for task in tasks:
849 850
            if isinstance(task, cls):
                if task.isSuccess():
S
Shuduo Sang 已提交
851 852
                    raise RuntimeError(
                        "Unexpected successful task: {}".format(cls))
853 854

    def hasSuccess(self, tasks, cls):
S
Shuduo Sang 已提交
855
        for task in tasks:
856 857 858 859 860 861
            if not isinstance(task, cls):
                continue
            if task.isSuccess():
                return True
        return False

S
Steven Li 已提交
862
    def hasTask(self, tasks, cls):
S
Shuduo Sang 已提交
863
        for task in tasks:
S
Steven Li 已提交
864 865 866 867
            if isinstance(task, cls):
                return True
        return False

S
Shuduo Sang 已提交
868

869 870 871 872
class StateInvalid(AnyState):
    def getInfo(self):
        return [
            self.STATE_INVALID,
S
Shuduo Sang 已提交
873 874 875
            False, False,  # can create/drop Db
            False, False,  # can create/drop fixed table
            False, False,  # can insert/read data with fixed table
876 877 878 879
        ]

    # def verifyTasksToState(self, tasks, newState):

S
Shuduo Sang 已提交
880

881 882 883 884
class StateEmpty(AnyState):
    def getInfo(self):
        return [
            self.STATE_EMPTY,
S
Shuduo Sang 已提交
885 886 887
            True, False,  # can create/drop Db
            False, False,  # can create/drop fixed table
            False, False,  # can insert/read data with fixed table
888 889
        ]

S
Shuduo Sang 已提交
890 891
    def verifyTasksToState(self, tasks, newState):
        if (self.hasSuccess(tasks, TaskCreateDb)
S
Shuduo Sang 已提交
892
            ):  # at EMPTY, if there's succes in creating DB
S
Shuduo Sang 已提交
893 894 895 896
            if (not self.hasTask(tasks, TaskDropDb)):  # and no drop_db tasks
                # we must have at most one. TODO: compare numbers
                self.assertAtMostOneSuccess(tasks, TaskCreateDb)

897 898 899 900 901 902 903 904 905 906 907

class StateDbOnly(AnyState):
    def getInfo(self):
        return [
            self.STATE_DB_ONLY,
            False, True,
            True, False,
            False, False,
        ]

    def verifyTasksToState(self, tasks, newState):
S
Shuduo Sang 已提交
908 909 910
        if (not self.hasTask(tasks, TaskCreateDb)):
            # only if we don't create any more
            self.assertAtMostOneSuccess(tasks, TaskDropDb)
911
        self.assertIfExistThenSuccess(tasks, TaskDropDb)
S
Steven Li 已提交
912
        # self.assertAtMostOneSuccess(tasks, CreateFixedTableTask) # not true in massively parrallel cases
913
        # Nothing to be said about adding data task
914
        # if ( self.hasSuccess(tasks, DropDbTask) ): # dropped the DB
S
Shuduo Sang 已提交
915 916 917
        # self.assertHasTask(tasks, DropDbTask) # implied by hasSuccess
        # self.assertAtMostOneSuccess(tasks, DropDbTask)
        # self._state = self.STATE_EMPTY
918 919
        # if ( self.hasSuccess(tasks, TaskCreateSuperTable) ): # did not drop db, create table success
        #     # self.assertHasTask(tasks, CreateFixedTableTask) # tried to create table
S
Shuduo Sang 已提交
920
        #     if ( not self.hasTask(tasks, TaskDropSuperTable) ):
921
        #         self.assertAtMostOneSuccess(tasks, TaskCreateSuperTable) # at most 1 attempt is successful, if we don't drop anything
S
Shuduo Sang 已提交
922 923 924 925 926 927
        # self.assertNoTask(tasks, DropDbTask) # should have have tried
        # if ( not self.hasSuccess(tasks, AddFixedDataTask) ): # just created table, no data yet
        #     # can't say there's add-data attempts, since they may all fail
        #     self._state = self.STATE_TABLE_ONLY
        # else:
        #     self._state = self.STATE_HAS_DATA
928 929 930 931
        # What about AddFixedData?
        # elif ( self.hasSuccess(tasks, AddFixedDataTask) ):
        #     self._state = self.STATE_HAS_DATA
        # else: # no success in dropping db tasks, no success in create fixed table? read data should also fail
S
Shuduo Sang 已提交
932
        #     # raise RuntimeError("Unexpected no-success scenario")   # We might just landed all failure tasks,
933 934
        #     self._state = self.STATE_DB_ONLY  # no change

S
Shuduo Sang 已提交
935

936
class StateSuperTableOnly(AnyState):
937 938 939 940 941 942 943 944 945
    def getInfo(self):
        return [
            self.STATE_TABLE_ONLY,
            False, True,
            False, True,
            True, True,
        ]

    def verifyTasksToState(self, tasks, newState):
S
Shuduo Sang 已提交
946
        if (self.hasSuccess(tasks, TaskDropSuperTable)
S
Shuduo Sang 已提交
947
            ):  # we are able to drop the table
948
            #self.assertAtMostOneSuccess(tasks, TaskDropSuperTable)
S
Shuduo Sang 已提交
949 950
            # we must have had recreted it
            self.hasSuccess(tasks, TaskCreateSuperTable)
951

952
            # self._state = self.STATE_DB_ONLY
S
Steven Li 已提交
953 954
        # elif ( self.hasSuccess(tasks, AddFixedDataTask) ): # no success dropping the table, but added data
        #     self.assertNoTask(tasks, DropFixedTableTask) # not true in massively parrallel cases
955
            # self._state = self.STATE_HAS_DATA
S
Steven Li 已提交
956 957 958
        # elif ( self.hasSuccess(tasks, ReadFixedDataTask) ): # no success in prev cases, but was able to read data
            # self.assertNoTask(tasks, DropFixedTableTask)
            # self.assertNoTask(tasks, AddFixedDataTask)
959
            # self._state = self.STATE_TABLE_ONLY # no change
S
Steven Li 已提交
960 961 962
        # else: # did not drop table, did not insert data, did not read successfully, that is impossible
        #     raise RuntimeError("Unexpected no-success scenarios")
        # TODO: need to revamp!!
963

S
Shuduo Sang 已提交
964

965 966 967 968 969 970 971 972 973 974
class StateHasData(AnyState):
    def getInfo(self):
        return [
            self.STATE_HAS_DATA,
            False, True,
            False, True,
            True, True,
        ]

    def verifyTasksToState(self, tasks, newState):
S
Shuduo Sang 已提交
975
        if (newState.equals(AnyState.STATE_EMPTY)):
976
            self.hasSuccess(tasks, TaskDropDb)
S
Shuduo Sang 已提交
977 978 979 980
            if (not self.hasTask(tasks, TaskCreateDb)):
                self.assertAtMostOneSuccess(tasks, TaskDropDb)  # TODO: dicy
        elif (newState.equals(AnyState.STATE_DB_ONLY)):  # in DB only
            if (not self.hasTask(tasks, TaskCreateDb)
S
Shuduo Sang 已提交
981
                    ):  # without a create_db task
S
Shuduo Sang 已提交
982 983
                # we must have drop_db task
                self.assertNoTask(tasks, TaskDropDb)
984
            self.hasSuccess(tasks, TaskDropSuperTable)
985
            # self.assertAtMostOneSuccess(tasks, DropFixedSuperTableTask) # TODO: dicy
986 987 988 989
        # elif ( newState.equals(AnyState.STATE_TABLE_ONLY) ): # data deleted
            # self.assertNoTask(tasks, TaskDropDb)
            # self.assertNoTask(tasks, TaskDropSuperTable)
            # self.assertNoTask(tasks, TaskAddData)
S
Steven Li 已提交
990
            # self.hasSuccess(tasks, DeleteDataTasks)
S
Shuduo Sang 已提交
991 992
        else:  # should be STATE_HAS_DATA
            if (not self.hasTask(tasks, TaskCreateDb)
S
Shuduo Sang 已提交
993
                    ):  # only if we didn't create one
S
Shuduo Sang 已提交
994 995 996
                # we shouldn't have dropped it
                self.assertNoTask(tasks, TaskDropDb)
            if (not self.hasTask(tasks, TaskCreateSuperTable)
S
Shuduo Sang 已提交
997
                ):  # if we didn't create the table
S
Shuduo Sang 已提交
998 999
                # we should not have a task that drops it
                self.assertNoTask(tasks, TaskDropSuperTable)
1000
            # self.assertIfExistThenSuccess(tasks, ReadFixedDataTask)
S
Steven Li 已提交
1001

S
Shuduo Sang 已提交
1002

1003
class StateMechine:
1004 1005
    def __init__(self, dbConn):
        self._dbConn = dbConn
S
Shuduo Sang 已提交
1006 1007 1008 1009 1010
        self._curState = self._findCurrentState()  # starting state
        # transitition target probabilities, indexed with value of STATE_EMPTY,
        # STATE_DB_ONLY, etc.
        self._stateWeights = [1, 3, 5, 15]

1011 1012 1013
    def getCurrentState(self):
        return self._curState

1014 1015 1016
    def hasDatabase(self):
        return self._curState.canDropDb()  # ha, can drop DB means it has one

1017
    # May be slow, use cautionsly...
S
Shuduo Sang 已提交
1018
    def getTaskTypes(self):  # those that can run (directly/indirectly) from the current state
1019 1020 1021 1022 1023 1024
        def typesToStrings(types):
            ss = []
            for t in types:
                ss.append(t.__name__)
            return ss

S
Shuduo Sang 已提交
1025
        allTaskClasses = StateTransitionTask.__subclasses__()  # all state transition tasks
1026 1027
        firstTaskTypes = []
        for tc in allTaskClasses:
S
Shuduo Sang 已提交
1028
            # t = tc(self) # create task object
1029 1030
            if tc.canBeginFrom(self._curState):
                firstTaskTypes.append(tc)
S
Shuduo Sang 已提交
1031 1032 1033 1034 1035 1036 1037 1038
        # now we have all the tasks that can begin directly from the current
        # state, let's figure out the INDIRECT ones
        taskTypes = firstTaskTypes.copy()  # have to have these
        for task1 in firstTaskTypes:  # each task type gathered so far
            endState = task1.getEndState()  # figure the end state
            if endState is None:  # does not change end state
                continue  # no use, do nothing
            for tc in allTaskClasses:  # what task can further begin from there?
1039
                if tc.canBeginFrom(endState) and (tc not in firstTaskTypes):
S
Shuduo Sang 已提交
1040
                    taskTypes.append(tc)  # gather it
1041 1042

        if len(taskTypes) <= 0:
S
Shuduo Sang 已提交
1043 1044 1045 1046 1047 1048 1049
            raise RuntimeError(
                "No suitable task types found for state: {}".format(
                    self._curState))
        logger.debug(
            "[OPS] Tasks found for state {}: {}".format(
                self._curState,
                typesToStrings(taskTypes)))
1050 1051 1052 1053
        return taskTypes

    def _findCurrentState(self):
        dbc = self._dbConn
S
Shuduo Sang 已提交
1054 1055
        ts = time.time()  # we use this to debug how fast/slow it is to do the various queries to find the current DB state
        if dbc.query("show databases") == 0:  # no database?!
1056
            # logger.debug("Found EMPTY state")
S
Shuduo Sang 已提交
1057 1058 1059
            logger.debug(
                "[STT] empty database found, between {} and {}".format(
                    ts, time.time()))
1060
            return StateEmpty()
S
Shuduo Sang 已提交
1061 1062 1063 1064
        # did not do this when openning connection, and this is NOT the worker
        # thread, which does this on their own
        dbc.execute("use db")
        if dbc.query("show tables") == 0:  # no tables
1065
            # logger.debug("Found DB ONLY state")
S
Shuduo Sang 已提交
1066 1067 1068
            logger.debug(
                "[STT] DB_ONLY found, between {} and {}".format(
                    ts, time.time()))
1069
            return StateDbOnly()
S
Shuduo Sang 已提交
1070 1071
        if dbc.query("SELECT * FROM db.{}".format(DbManager.getFixedSuperTableName())
                     ) == 0:  # no regular tables
1072
            # logger.debug("Found TABLE_ONLY state")
S
Shuduo Sang 已提交
1073 1074 1075
            logger.debug(
                "[STT] SUPER_TABLE_ONLY found, between {} and {}".format(
                    ts, time.time()))
1076
            return StateSuperTableOnly()
S
Shuduo Sang 已提交
1077
        else:  # has actual tables
1078
            # logger.debug("Found HAS_DATA state")
S
Shuduo Sang 已提交
1079 1080 1081
            logger.debug(
                "[STT] HAS_DATA found, between {} and {}".format(
                    ts, time.time()))
1082 1083 1084
            return StateHasData()

    def transition(self, tasks):
S
Shuduo Sang 已提交
1085
        if (len(tasks) == 0):  # before 1st step, or otherwise empty
1086
            logger.debug("[STT] Starting State: {}".format(self._curState))
S
Shuduo Sang 已提交
1087
            return  # do nothing
1088

S
Shuduo Sang 已提交
1089 1090
        # this should show up in the server log, separating steps
        self._dbConn.execute("show dnodes")
1091 1092 1093 1094

        # Generic Checks, first based on the start state
        if self._curState.canCreateDb():
            self._curState.assertIfExistThenSuccess(tasks, TaskCreateDb)
S
Shuduo Sang 已提交
1095 1096
            # self.assertAtMostOneSuccess(tasks, CreateDbTask) # not really, in
            # case of multiple creation and drops
1097 1098 1099

        if self._curState.canDropDb():
            self._curState.assertIfExistThenSuccess(tasks, TaskDropDb)
S
Shuduo Sang 已提交
1100 1101
            # self.assertAtMostOneSuccess(tasks, DropDbTask) # not really in
            # case of drop-create-drop
1102 1103 1104

        # if self._state.canCreateFixedTable():
            # self.assertIfExistThenSuccess(tasks, CreateFixedTableTask) # Not true, DB may be dropped
S
Shuduo Sang 已提交
1105 1106
            # self.assertAtMostOneSuccess(tasks, CreateFixedTableTask) # not
            # really, in case of create-drop-create
1107 1108 1109

        # if self._state.canDropFixedTable():
            # self.assertIfExistThenSuccess(tasks, DropFixedTableTask) # Not True, the whole DB may be dropped
S
Shuduo Sang 已提交
1110 1111
            # self.assertAtMostOneSuccess(tasks, DropFixedTableTask) # not
            # really in case of drop-create-drop
1112 1113

        # if self._state.canAddData():
S
Shuduo Sang 已提交
1114 1115
        # self.assertIfExistThenSuccess(tasks, AddFixedDataTask)  # not true
        # actually
1116 1117 1118 1119 1120 1121

        # if self._state.canReadData():
            # Nothing for sure

        newState = self._findCurrentState()
        logger.debug("[STT] New DB state determined: {}".format(newState))
S
Shuduo Sang 已提交
1122 1123
        # can old state move to new state through the tasks?
        self._curState.verifyTasksToState(tasks, newState)
1124 1125 1126
        self._curState = newState

    def pickTaskType(self):
S
Shuduo Sang 已提交
1127 1128
        # all the task types we can choose from at curent state
        taskTypes = self.getTaskTypes()
1129 1130 1131
        weights = []
        for tt in taskTypes:
            endState = tt.getEndState()
S
Shuduo Sang 已提交
1132 1133 1134
            if endState is not None:
                # TODO: change to a method
                weights.append(self._stateWeights[endState.getValIndex()])
1135
            else:
S
Shuduo Sang 已提交
1136 1137
                # read data task, default to 10: TODO: change to a constant
                weights.append(10)
1138
        i = self._weighted_choice_sub(weights)
S
Shuduo Sang 已提交
1139
        # logger.debug(" (weighted random:{}/{}) ".format(i, len(taskTypes)))
1140 1141
        return taskTypes[i]

S
Shuduo Sang 已提交
1142 1143 1144 1145 1146
    # ref:
    # https://eli.thegreenplace.net/2010/01/22/weighted-random-generation-in-python/
    def _weighted_choice_sub(self, weights):
        # TODO: use our dice to ensure it being determinstic?
        rnd = random.random() * sum(weights)
1147 1148 1149 1150
        for i, w in enumerate(weights):
            rnd -= w
            if rnd < 0:
                return i
1151

1152
# Manager of the Database Data/Connection
S
Shuduo Sang 已提交
1153 1154 1155 1156


class DbManager():
    def __init__(self, resetDb=True):
S
Steven Li 已提交
1157
        self.tableNumQueue = LinearQueue()
S
Shuduo Sang 已提交
1158 1159 1160
        # datetime.datetime(2019, 1, 1) # initial date time tick
        self._lastTick = self.setupLastTick()
        self._lastInt = 0  # next one is initial integer
1161
        self._lock = threading.RLock()
S
Shuduo Sang 已提交
1162

1163
        # self.openDbServerConnection()
S
Shuduo Sang 已提交
1164 1165
        self._dbConn = DbConn.createNative() if (
            gConfig.connector_type == 'native') else DbConn.createRest()
1166
        try:
S
Shuduo Sang 已提交
1167
            self._dbConn.open()  # may throw taos.error.ProgrammingError: disconnected
1168 1169
        except taos.error.ProgrammingError as err:
            # print("Error type: {}, msg: {}, value: {}".format(type(err), err.msg, err))
S
Shuduo Sang 已提交
1170 1171 1172
            if (err.msg == 'client disconnected'):  # cannot open DB connection
                print(
                    "Cannot establish DB connection, please re-run script without parameter, and follow the instructions.")
S
Steven Li 已提交
1173
                sys.exit(2)
1174
            else:
S
Shuduo Sang 已提交
1175 1176
                raise
        except BaseException:
S
Steven Li 已提交
1177
            print("[=] Unexpected exception")
S
Shuduo Sang 已提交
1178 1179 1180 1181
            raise

        if resetDb:
            self._dbConn.resetDb()  # drop and recreate DB
1182

S
Shuduo Sang 已提交
1183 1184
        # Do this after dbConn is in proper shape
        self._stateMachine = StateMechine(self._dbConn)
1185

1186 1187 1188
    def getDbConn(self):
        return self._dbConn

S
Shuduo Sang 已提交
1189
    def getStateMachine(self) -> StateMechine:
1190 1191 1192 1193
        return self._stateMachine

    # def getState(self):
    #     return self._stateMachine.getCurrentState()
1194 1195 1196 1197 1198 1199

    # We aim to create a starting time tick, such that, whenever we run our test here once
    # We should be able to safely create 100,000 records, which will not have any repeated time stamp
    # when we re-run the test in 3 minutes (180 seconds), basically we should expand time duration
    # by a factor of 500.
    # TODO: what if it goes beyond 10 years into the future
1200
    # TODO: fix the error as result of above: "tsdb timestamp is out of range"
1201
    def setupLastTick(self):
1202
        t1 = datetime.datetime(2020, 6, 1)
1203
        t2 = datetime.datetime.now()
S
Shuduo Sang 已提交
1204 1205 1206 1207
        # maybe a very large number, takes 69 years to exceed Python int range
        elSec = int(t2.timestamp() - t1.timestamp())
        elSec2 = (elSec % (8 * 12 * 30 * 24 * 60 * 60 / 500)) * \
            500  # a number representing seconds within 10 years
1208
        # print("elSec = {}".format(elSec))
S
Shuduo Sang 已提交
1209 1210 1211
        t3 = datetime.datetime(2012, 1, 1)  # default "keep" is 10 years
        t4 = datetime.datetime.fromtimestamp(
            t3.timestamp() + elSec2)  # see explanation above
1212 1213 1214
        logger.info("Setting up TICKS to start from: {}".format(t4))
        return t4

S
Shuduo Sang 已提交
1215
    def pickAndAllocateTable(self):  # pick any table, and "use" it
S
Steven Li 已提交
1216 1217
        return self.tableNumQueue.pickAndAllocate()

1218 1219 1220 1221 1222
    def addTable(self):
        with self._lock:
            tIndex = self.tableNumQueue.push()
        return tIndex

1223 1224
    @classmethod
    def getFixedSuperTableName(cls):
1225
        return "fs_table"
1226

S
Shuduo Sang 已提交
1227
    def releaseTable(self, i):  # return the table back, so others can use it
S
Steven Li 已提交
1228 1229
        self.tableNumQueue.release(i)

1230
    def getNextTick(self):
S
Shuduo Sang 已提交
1231 1232
        with self._lock:  # prevent duplicate tick
            if Dice.throw(10) == 0:  # 1 in 10 chance
S
Steven Li 已提交
1233
                return self._lastTick + datetime.timedelta(0, -100)
S
Shuduo Sang 已提交
1234 1235 1236
            else:  # regular
                # add one second to it
                self._lastTick += datetime.timedelta(0, 1)
S
Steven Li 已提交
1237
                return self._lastTick
1238 1239

    def getNextInt(self):
1240 1241 1242
        with self._lock:
            self._lastInt += 1
            return self._lastInt
1243 1244

    def getNextBinary(self):
S
Shuduo Sang 已提交
1245 1246
        return "Beijing_Shanghai_Los_Angeles_New_York_San_Francisco_Chicago_Beijing_Shanghai_Los_Angeles_New_York_San_Francisco_Chicago_{}".format(
            self.getNextInt())
1247 1248 1249

    def getNextFloat(self):
        return 0.9 + self.getNextInt()
S
Shuduo Sang 已提交
1250

S
Steven Li 已提交
1251
    def getTableNameToDelete(self):
S
Shuduo Sang 已提交
1252 1253
        tblNum = self.tableNumQueue.pop()  # TODO: race condition!
        if (not tblNum):  # maybe false
1254
            return False
S
Shuduo Sang 已提交
1255

S
Steven Li 已提交
1256 1257
        return "table_{}".format(tblNum)

1258
    def cleanUp(self):
S
Shuduo Sang 已提交
1259 1260
        self._dbConn.close()

1261

1262
class TaskExecutor():
1263
    class BoundedList:
S
Shuduo Sang 已提交
1264
        def __init__(self, size=10):
1265 1266 1267
            self._size = size
            self._list = []

S
Shuduo Sang 已提交
1268 1269
        def add(self, n: int):
            if not self._list:  # empty
1270 1271 1272 1273 1274 1275 1276
                self._list.append(n)
                return
            # now we should insert
            nItems = len(self._list)
            insPos = 0
            for i in range(nItems):
                insPos = i
S
Shuduo Sang 已提交
1277 1278 1279
                if n <= self._list[i]:  # smaller than this item, time to insert
                    break  # found the insertion point
                insPos += 1  # insert to the right
1280

S
Shuduo Sang 已提交
1281 1282
            if insPos == 0:  # except for the 1st item, # TODO: elimiate first item as gating item
                return  # do nothing
1283 1284

            # print("Inserting at postion {}, value: {}".format(insPos, n))
S
Shuduo Sang 已提交
1285 1286
            self._list.insert(insPos, n)  # insert

1287
            newLen = len(self._list)
S
Shuduo Sang 已提交
1288 1289 1290 1291 1292
            if newLen <= self._size:
                return  # do nothing
            elif newLen == (self._size + 1):
                del self._list[0]  # remove the first item
            else:
1293 1294 1295 1296 1297 1298 1299
                raise RuntimeError("Corrupt Bounded List")

        def __str__(self):
            return repr(self._list)

    _boundedList = BoundedList()

1300 1301 1302
    def __init__(self, curStep):
        self._curStep = curStep

1303 1304 1305 1306
    @classmethod
    def getBoundedList(cls):
        return cls._boundedList

1307 1308 1309
    def getCurStep(self):
        return self._curStep

S
Shuduo Sang 已提交
1310
    def execute(self, task: Task, wt: WorkerThread):  # execute a task on a thread
1311
        task.execute(wt)
1312

1313 1314 1315 1316
    def recordDataMark(self, n: int):
        # print("[{}]".format(n), end="", flush=True)
        self._boundedList.add(n)

1317 1318
    # def logInfo(self, msg):
    #     logger.info("    T[{}.x]: ".format(self._curStep) + msg)
1319

1320 1321
    # def logDebug(self, msg):
    #     logger.debug("    T[{}.x]: ".format(self._curStep) + msg)
1322

S
Shuduo Sang 已提交
1323

S
Steven Li 已提交
1324
class Task():
1325 1326 1327 1328
    taskSn = 100

    @classmethod
    def allocTaskNum(cls):
S
Shuduo Sang 已提交
1329
        Task.taskSn += 1  # IMPORTANT: cannot use cls.taskSn, since each sub class will have a copy
S
Steven Li 已提交
1330 1331
        # logger.debug("Allocating taskSN: {}".format(Task.taskSn))
        return Task.taskSn
1332

S
Shuduo Sang 已提交
1333
    def __init__(self, dbManager: DbManager, execStats: ExecutionStats):
1334
        self._dbManager = dbManager
S
Shuduo Sang 已提交
1335
        self._workerThread = None
1336
        self._err = None
1337
        self._aborted = False
1338
        self._curStep = None
S
Shuduo Sang 已提交
1339
        self._numRows = None  # Number of rows affected
1340

S
Shuduo Sang 已提交
1341
        # Assign an incremental task serial number
1342
        self._taskNum = self.allocTaskNum()
S
Steven Li 已提交
1343
        # logger.debug("Creating new task {}...".format(self._taskNum))
1344

1345
        self._execStats = execStats
S
Shuduo Sang 已提交
1346
        self._lastSql = ""  # last SQL executed/attempted
1347

1348
    def isSuccess(self):
S
Shuduo Sang 已提交
1349
        return self._err is None
1350

1351 1352 1353
    def isAborted(self):
        return self._aborted

S
Shuduo Sang 已提交
1354
    def clone(self):  # TODO: why do we need this again?
1355
        newTask = self.__class__(self._dbManager, self._execStats)
1356 1357 1358
        return newTask

    def logDebug(self, msg):
S
Shuduo Sang 已提交
1359 1360 1361
        self._workerThread.logDebug(
            "Step[{}.{}] {}".format(
                self._curStep, self._taskNum, msg))
1362 1363

    def logInfo(self, msg):
S
Shuduo Sang 已提交
1364 1365 1366
        self._workerThread.logInfo(
            "Step[{}.{}] {}".format(
                self._curStep, self._taskNum, msg))
1367

1368
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
S
Shuduo Sang 已提交
1369 1370 1371
        raise RuntimeError(
            "To be implemeted by child classes, class name: {}".format(
                self.__class__.__name__))
1372

1373 1374
    def execute(self, wt: WorkerThread):
        wt.verifyThreadSelf()
S
Shuduo Sang 已提交
1375
        self._workerThread = wt  # type: ignore
1376 1377

        te = wt.getTaskExecutor()
1378
        self._curStep = te.getCurStep()
S
Shuduo Sang 已提交
1379 1380
        self.logDebug(
            "[-] executing task {}...".format(self.__class__.__name__))
1381 1382

        self._err = None
S
Shuduo Sang 已提交
1383 1384
        self._execStats.beginTaskType(
            self.__class__.__name__)  # mark beginning
1385
        try:
S
Shuduo Sang 已提交
1386
            self._executeInternal(te, wt)  # TODO: no return value?
1387
        except taos.error.ProgrammingError as err:
S
Shuduo Sang 已提交
1388 1389 1390 1391 1392 1393
            errno2 = err.errno if (
                err.errno > 0) else 0x80000000 + err.errno  # correct error scheme
            if (errno2 in [
                0x05,  # TSDB_CODE_RPC_NOT_READY
                0x200, 0x360, 0x362, 0x36A, 0x36B, 0x36D, 0x381, 0x380, 0x383, 0x503,
                0x510,  # vnode not in ready state
1394
                0x600,
S
Shuduo Sang 已提交
1395 1396 1397 1398 1399
                1000  # REST catch-all error
            ]):  # allowed errors
                self.logDebug(
                    "[=] Acceptable Taos library exception: errno=0x{:X}, msg: {}, SQL: {}".format(
                        errno2, err, self._lastSql))
1400
                print("_", end="", flush=True)
S
Shuduo Sang 已提交
1401
                self._err = err
1402
            else:
S
Shuduo Sang 已提交
1403 1404
                errMsg = "[=] Unexpected Taos library exception: errno=0x{:X}, msg: {}, SQL: {}".format(
                    errno2, err, self._lastSql)
1405
                self.logDebug(errMsg)
S
Shuduo Sang 已提交
1406 1407 1408 1409 1410
                if gConfig.debug:
                    raise  # so that we see full stack
                else:  # non-debug
                    print(
                        "\n\n----------------------------\nProgram ABORTED Due to Unexpected TAOS Error: \n\n{}\n".format(errMsg) +
1411
                        "----------------------------\n")
1412 1413 1414
                    # sys.exit(-1)
                    self._err = err
                    self._aborted = True
S
Shuduo Sang 已提交
1415
        except Exception as e:
S
Steven Li 已提交
1416
            self.logInfo("Non-TAOS exception encountered")
S
Shuduo Sang 已提交
1417
            self._err = e
S
Steven Li 已提交
1418 1419
            self._aborted = True
            traceback.print_exc()
S
Shuduo Sang 已提交
1420 1421 1422 1423
        except BaseException:
            self.logDebug(
                "[=] Unexpected exception, SQL: {}".format(
                    self._lastSql))
1424
            raise
1425
        self._execStats.endTaskType(self.__class__.__name__, self.isSuccess())
S
Shuduo Sang 已提交
1426 1427 1428 1429 1430

        self.logDebug("[X] task execution completed, {}, status: {}".format(
            self.__class__.__name__, "Success" if self.isSuccess() else "Failure"))
        # TODO: merge with above.
        self._execStats.incExecCount(self.__class__.__name__, self.isSuccess())
S
Steven Li 已提交
1431

1432
    def execSql(self, sql):
1433
        self._lastSql = sql
1434
        return self._dbManager.execute(sql)
1435

S
Shuduo Sang 已提交
1436
    def execWtSql(self, wt: WorkerThread, sql):  # execute an SQL on the worker thread
1437 1438 1439
        self._lastSql = sql
        return wt.execSql(sql)

S
Shuduo Sang 已提交
1440
    def queryWtSql(self, wt: WorkerThread, sql):  # execute an SQL on the worker thread
1441 1442 1443
        self._lastSql = sql
        return wt.querySql(sql)

S
Shuduo Sang 已提交
1444
    def getQueryResult(self, wt: WorkerThread):  # execute an SQL on the worker thread
1445 1446 1447
        return wt.getQueryResult()


1448
class ExecutionStats:
1449
    def __init__(self):
S
Shuduo Sang 已提交
1450 1451
        # total/success times for a task
        self._execTimes: Dict[str, [int, int]] = {}
1452 1453 1454
        self._tasksInProgress = 0
        self._lock = threading.Lock()
        self._firstTaskStartTime = None
1455
        self._execStartTime = None
S
Shuduo Sang 已提交
1456 1457
        self._elapsedTime = 0.0  # total elapsed time
        self._accRunTime = 0.0  # accumulated run time
1458

1459 1460 1461
        self._failed = False
        self._failureReason = None

S
Steven Li 已提交
1462
    def __str__(self):
S
Shuduo Sang 已提交
1463 1464
        return "[ExecStats: _failed={}, _failureReason={}".format(
            self._failed, self._failureReason)
S
Steven Li 已提交
1465 1466

    def isFailed(self):
S
Shuduo Sang 已提交
1467
        return self._failed
S
Steven Li 已提交
1468

1469 1470 1471 1472 1473 1474
    def startExec(self):
        self._execStartTime = time.time()

    def endExec(self):
        self._elapsedTime = time.time() - self._execStartTime

S
Shuduo Sang 已提交
1475
    def incExecCount(self, klassName, isSuccess):  # TODO: add a lock here
1476 1477
        if klassName not in self._execTimes:
            self._execTimes[klassName] = [0, 0]
S
Shuduo Sang 已提交
1478 1479
        t = self._execTimes[klassName]  # tuple for the data
        t[0] += 1  # index 0 has the "total" execution times
1480
        if isSuccess:
S
Shuduo Sang 已提交
1481
            t[1] += 1  # index 1 has the "success" execution times
1482 1483 1484

    def beginTaskType(self, klassName):
        with self._lock:
S
Shuduo Sang 已提交
1485 1486
            if self._tasksInProgress == 0:  # starting a new round
                self._firstTaskStartTime = time.time()  # I am now the first task
1487 1488 1489 1490 1491
            self._tasksInProgress += 1

    def endTaskType(self, klassName, isSuccess):
        with self._lock:
            self._tasksInProgress -= 1
S
Shuduo Sang 已提交
1492
            if self._tasksInProgress == 0:  # all tasks have stopped
1493 1494 1495
                self._accRunTime += (time.time() - self._firstTaskStartTime)
                self._firstTaskStartTime = None

1496 1497 1498 1499
    def registerFailure(self, reason):
        self._failed = True
        self._failureReason = reason

1500
    def printStats(self):
S
Shuduo Sang 已提交
1501 1502 1503 1504 1505 1506
        logger.info(
            "----------------------------------------------------------------------")
        logger.info(
            "| Crash_Gen test {}, with the following stats:". format(
                "FAILED (reason: {})".format(
                    self._failureReason) if self._failed else "SUCCEEDED"))
1507
        logger.info("| Task Execution Times (success/total):")
1508
        execTimesAny = 0
S
Shuduo Sang 已提交
1509
        for k, n in self._execTimes.items():
1510
            execTimesAny += n[0]
S
Shuduo Sang 已提交
1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530
            logger.info("|    {0:<24}: {1}/{2}".format(k, n[1], n[0]))

        logger.info(
            "| Total Tasks Executed (success or not): {} ".format(execTimesAny))
        logger.info(
            "| Total Tasks In Progress at End: {}".format(
                self._tasksInProgress))
        logger.info(
            "| Total Task Busy Time (elapsed time when any task is in progress): {:.3f} seconds".format(
                self._accRunTime))
        logger.info(
            "| Average Per-Task Execution Time: {:.3f} seconds".format(self._accRunTime / execTimesAny))
        logger.info(
            "| Total Elapsed Time (from wall clock): {:.3f} seconds".format(
                self._elapsedTime))
        logger.info(
            "| Top numbers written: {}".format(
                TaskExecutor.getBoundedList()))
        logger.info(
            "----------------------------------------------------------------------")
1531 1532 1533


class StateTransitionTask(Task):
1534 1535 1536 1537 1538
    LARGE_NUMBER_OF_TABLES = 35
    SMALL_NUMBER_OF_TABLES = 3
    LARGE_NUMBER_OF_RECORDS = 50
    SMALL_NUMBER_OF_RECORDS = 3

1539
    @classmethod
S
Shuduo Sang 已提交
1540
    def getInfo(cls):  # each sub class should supply their own information
1541 1542
        raise RuntimeError("Overriding method expected")

S
Shuduo Sang 已提交
1543
    _endState = None
1544
    @classmethod
S
Shuduo Sang 已提交
1545
    def getEndState(cls):  # TODO: optimize by calling it fewer times
1546 1547
        raise RuntimeError("Overriding method expected")

1548 1549 1550
    # @classmethod
    # def getBeginStates(cls):
    #     return cls.getInfo()[0]
1551

1552 1553 1554
    # @classmethod
    # def getEndState(cls): # returning the class name
    #     return cls.getInfo()[0]
1555 1556

    @classmethod
1557 1558 1559
    def canBeginFrom(cls, state: AnyState):
        # return state.getValue() in cls.getBeginStates()
        raise RuntimeError("must be overriden")
1560

1561 1562 1563 1564
    @classmethod
    def getRegTableName(cls, i):
        return "db.reg_table_{}".format(i)

1565 1566
    def execute(self, wt: WorkerThread):
        super().execute(wt)
S
Shuduo Sang 已提交
1567 1568


1569
class TaskCreateDb(StateTransitionTask):
1570
    @classmethod
1571
    def getEndState(cls):
S
Shuduo Sang 已提交
1572
        return StateDbOnly()
1573

1574 1575 1576 1577
    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canCreateDb()

1578
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
S
Shuduo Sang 已提交
1579 1580
        self.execWtSql(wt, "create database db")

1581

1582
class TaskDropDb(StateTransitionTask):
1583
    @classmethod
1584 1585
    def getEndState(cls):
        return StateEmpty()
1586

1587 1588 1589 1590
    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canDropDb()

1591
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
1592
        self.execWtSql(wt, "drop database db")
S
Steven Li 已提交
1593
        logger.debug("[OPS] database dropped at {}".format(time.time()))
1594

S
Shuduo Sang 已提交
1595

1596
class TaskCreateSuperTable(StateTransitionTask):
1597
    @classmethod
1598 1599
    def getEndState(cls):
        return StateSuperTableOnly()
1600

1601 1602
    @classmethod
    def canBeginFrom(cls, state: AnyState):
1603
        return state.canCreateFixedSuperTable()
1604

1605
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
S
Shuduo Sang 已提交
1606
        if not wt.dbInUse():  # no DB yet, to the best of our knowledge
1607 1608 1609
            logger.debug("Skipping task, no DB yet")
            return

S
Shuduo Sang 已提交
1610
        tblName = self._dbManager.getFixedSuperTableName()
1611
        # wt.execSql("use db")    # should always be in place
S
Shuduo Sang 已提交
1612 1613 1614 1615 1616
        self.execWtSql(
            wt,
            "create table db.{} (ts timestamp, speed int) tags (b binary(200), f float) ".format(tblName))
        # No need to create the regular tables, INSERT will do that
        # automatically
1617

S
Steven Li 已提交
1618

1619
class TaskReadData(StateTransitionTask):
1620
    @classmethod
1621
    def getEndState(cls):
S
Shuduo Sang 已提交
1622
        return None  # meaning doesn't affect state
1623

1624 1625 1626 1627
    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canReadData()

1628
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
S
Shuduo Sang 已提交
1629 1630 1631
        sTbName = self._dbManager.getFixedSuperTableName()
        self.queryWtSql(wt, "select TBNAME from db.{}".format(
            sTbName))  # TODO: analyze result set later
1632

S
Shuduo Sang 已提交
1633 1634
        if random.randrange(
                5) == 0:  # 1 in 5 chance, simulate a broken connection. TODO: break connection in all situations
1635 1636
            wt.getDbConn().close()
            wt.getDbConn().open()
1637
        else:
S
Shuduo Sang 已提交
1638 1639
            # wt.getDbConn().getQueryResult()
            rTables = self.getQueryResult(wt)
1640
            # print("rTables[0] = {}, type = {}".format(rTables[0], type(rTables[0])))
S
Shuduo Sang 已提交
1641
            for rTbName in rTables:  # regular tables
1642
                self.execWtSql(wt, "select * from db.{}".format(rTbName[0]))
1643

1644 1645
        # tdSql.query(" cars where tbname in ('carzero', 'carone')")

S
Shuduo Sang 已提交
1646

1647
class TaskDropSuperTable(StateTransitionTask):
1648
    @classmethod
1649
    def getEndState(cls):
S
Shuduo Sang 已提交
1650
        return StateDbOnly()
1651

1652 1653
    @classmethod
    def canBeginFrom(cls, state: AnyState):
1654
        return state.canDropFixedSuperTable()
1655

1656
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
S
Shuduo Sang 已提交
1657 1658 1659 1660 1661 1662 1663
        # 1/2 chance, we'll drop the regular tables one by one, in a randomized
        # sequence
        if Dice.throw(2) == 0:
            tblSeq = list(range(
                2 + (self.LARGE_NUMBER_OF_TABLES if gConfig.larger_data else self.SMALL_NUMBER_OF_TABLES)))
            random.shuffle(tblSeq)
            tickOutput = False  # if we have spitted out a "d" character for "drop regular table"
1664
            isSuccess = True
S
Shuduo Sang 已提交
1665 1666 1667
            for i in tblSeq:
                regTableName = self.getRegTableName(
                    i)  # "db.reg_table_{}".format(i)
1668
                try:
S
Shuduo Sang 已提交
1669 1670 1671 1672 1673 1674 1675
                    self.execWtSql(wt, "drop table {}".format(
                        regTableName))  # nRows always 0, like MySQL
                except taos.error.ProgrammingError as err:
                    # correcting for strange error number scheme
                    errno2 = err.errno if (
                        err.errno > 0) else 0x80000000 + err.errno
                    if (errno2 in [0x362]):  # mnode invalid table name
1676
                        isSuccess = False
S
Shuduo Sang 已提交
1677 1678 1679
                        logger.debug(
                            "[DB] Acceptable error when dropping a table")
                    continue  # try to delete next regular table
1680 1681

                if (not tickOutput):
S
Shuduo Sang 已提交
1682 1683
                    tickOutput = True  # Print only one time
                    if isSuccess:
1684 1685
                        print("d", end="", flush=True)
                    else:
S
Shuduo Sang 已提交
1686
                        print("f", end="", flush=True)
1687 1688

        # Drop the super table itself
S
Shuduo Sang 已提交
1689
        tblName = self._dbManager.getFixedSuperTableName()
1690
        self.execWtSql(wt, "drop table db.{}".format(tblName))
1691

S
Shuduo Sang 已提交
1692

1693 1694 1695
class TaskAlterTags(StateTransitionTask):
    @classmethod
    def getEndState(cls):
S
Shuduo Sang 已提交
1696
        return None  # meaning doesn't affect state
1697 1698 1699

    @classmethod
    def canBeginFrom(cls, state: AnyState):
S
Shuduo Sang 已提交
1700
        return state.canDropFixedSuperTable()  # if we can drop it, we can alter tags
1701 1702

    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
S
Shuduo Sang 已提交
1703
        tblName = self._dbManager.getFixedSuperTableName()
1704
        dice = Dice.throw(4)
S
Shuduo Sang 已提交
1705
        if dice == 0:
1706
            sql = "alter table db.{} add tag extraTag int".format(tblName)
S
Shuduo Sang 已提交
1707
        elif dice == 1:
1708
            sql = "alter table db.{} drop tag extraTag".format(tblName)
S
Shuduo Sang 已提交
1709
        elif dice == 2:
1710
            sql = "alter table db.{} drop tag newTag".format(tblName)
S
Shuduo Sang 已提交
1711 1712 1713
        else:  # dice == 3
            sql = "alter table db.{} change tag extraTag newTag".format(
                tblName)
1714 1715

        self.execWtSql(wt, sql)
1716

S
Shuduo Sang 已提交
1717

1718
class TaskAddData(StateTransitionTask):
S
Shuduo Sang 已提交
1719 1720
    # Track which table is being actively worked on
    activeTable: Set[int] = set()
1721

S
Shuduo Sang 已提交
1722 1723
    # We use these two files to record operations to DB, useful for power-off
    # tests
1724 1725 1726 1727 1728
    fAddLogReady = None
    fAddLogDone = None

    @classmethod
    def prepToRecordOps(cls):
S
Shuduo Sang 已提交
1729 1730 1731 1732
        if gConfig.record_ops:
            if (cls.fAddLogReady is None):
                logger.info(
                    "Recording in a file operations to be performed...")
1733
                cls.fAddLogReady = open("add_log_ready.txt", "w")
S
Shuduo Sang 已提交
1734
            if (cls.fAddLogDone is None):
1735 1736
                logger.info("Recording in a file operations completed...")
                cls.fAddLogDone = open("add_log_done.txt", "w")
1737

1738
    @classmethod
1739 1740
    def getEndState(cls):
        return StateHasData()
1741 1742 1743 1744

    @classmethod
    def canBeginFrom(cls, state: AnyState):
        return state.canAddData()
S
Shuduo Sang 已提交
1745

1746
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
1747
        ds = self._dbManager
S
Shuduo Sang 已提交
1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758
        # wt.execSql("use db") # TODO: seems to be an INSERT bug to require
        # this
        tblSeq = list(
            range(
                self.LARGE_NUMBER_OF_TABLES if gConfig.larger_data else self.SMALL_NUMBER_OF_TABLES))
        random.shuffle(tblSeq)
        for i in tblSeq:
            if (i in self.activeTable):  # wow already active
                # logger.info("Concurrent data insertion into table: {}".format(i))
                # print("ct({})".format(i), end="", flush=True) # Concurrent
                # insertion into table
1759 1760
                print("x", end="", flush=True)
            else:
S
Shuduo Sang 已提交
1761 1762 1763 1764 1765 1766 1767 1768
                self.activeTable.add(i)  # marking it active
            # No need to shuffle data sequence, unless later we decide to do
            # non-increment insertion
            regTableName = self.getRegTableName(
                i)  # "db.reg_table_{}".format(i)
            for j in range(
                    self.LARGE_NUMBER_OF_RECORDS if gConfig.larger_data else self.SMALL_NUMBER_OF_RECORDS):  # number of records per table
                nextInt = ds.getNextInt()
1769 1770
                if gConfig.record_ops:
                    self.prepToRecordOps()
S
Shuduo Sang 已提交
1771 1772 1773
                    self.fAddLogReady.write(
                        "Ready to write {} to {}\n".format(
                            nextInt, regTableName))
1774 1775 1776
                    self.fAddLogReady.flush()
                    os.fsync(self.fAddLogReady)
                sql = "insert into {} using {} tags ('{}', {}) values ('{}', {});".format(
S
Shuduo Sang 已提交
1777 1778
                    regTableName,
                    ds.getFixedSuperTableName(),
1779
                    ds.getNextBinary(), ds.getNextFloat(),
1780
                    ds.getNextTick(), nextInt)
S
Shuduo Sang 已提交
1781 1782 1783
                self.execWtSql(wt, sql)
                # Successfully wrote the data into the DB, let's record it
                # somehow
1784
                te.recordDataMark(nextInt)
1785
                if gConfig.record_ops:
S
Shuduo Sang 已提交
1786 1787 1788
                    self.fAddLogDone.write(
                        "Wrote {} to {}\n".format(
                            nextInt, regTableName))
1789 1790
                    self.fAddLogDone.flush()
                    os.fsync(self.fAddLogDone)
S
Shuduo Sang 已提交
1791
            self.activeTable.discard(i)  # not raising an error, unlike remove
1792 1793


S
Steven Li 已提交
1794 1795
# Deterministic random number generator
class Dice():
S
Shuduo Sang 已提交
1796
    seeded = False  # static, uninitialized
S
Steven Li 已提交
1797 1798

    @classmethod
S
Shuduo Sang 已提交
1799
    def seed(cls, s):  # static
S
Steven Li 已提交
1800
        if (cls.seeded):
S
Shuduo Sang 已提交
1801 1802
            raise RuntimeError(
                "Cannot seed the random generator more than once")
S
Steven Li 已提交
1803 1804 1805 1806 1807
        cls.verifyRNG()
        random.seed(s)
        cls.seeded = True  # TODO: protect against multi-threading

    @classmethod
S
Shuduo Sang 已提交
1808
    def verifyRNG(cls):  # Verify that the RNG is determinstic
S
Steven Li 已提交
1809 1810 1811 1812
        random.seed(0)
        x1 = random.randrange(0, 1000)
        x2 = random.randrange(0, 1000)
        x3 = random.randrange(0, 1000)
S
Shuduo Sang 已提交
1813
        if (x1 != 864 or x2 != 394 or x3 != 776):
S
Steven Li 已提交
1814 1815 1816
            raise RuntimeError("System RNG is not deterministic")

    @classmethod
S
Shuduo Sang 已提交
1817
    def throw(cls, stop):  # get 0 to stop-1
1818
        return cls.throwRange(0, stop)
S
Steven Li 已提交
1819 1820

    @classmethod
S
Shuduo Sang 已提交
1821 1822
    def throwRange(cls, start, stop):  # up to stop-1
        if (not cls.seeded):
S
Steven Li 已提交
1823
            raise RuntimeError("Cannot throw dice before seeding it")
1824
        return random.randrange(start, stop)
S
Steven Li 已提交
1825 1826 1827


# Anyone needing to carry out work should simply come here
1828 1829 1830 1831 1832 1833 1834 1835 1836 1837
# class WorkDispatcher():
#     def __init__(self, dbState):
#         # self.totalNumMethods = 2
#         self.tasks = [
#             # CreateTableTask(dbState), # Obsolete
#             # DropTableTask(dbState),
#             # AddDataTask(dbState),
#         ]

#     def throwDice(self):
S
Shuduo Sang 已提交
1838
#         max = len(self.tasks) - 1
1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849
#         dRes = random.randint(0, max)
#         # logger.debug("Threw the dice in range [{},{}], and got: {}".format(0,max,dRes))
#         return dRes

#     def pickTask(self):
#         dice = self.throwDice()
#         return self.tasks[dice]

#     def doWork(self, workerThread):
#         task = self.pickTask()
#         task.execute(workerThread)
S
Steven Li 已提交
1850

S
Steven Li 已提交
1851 1852
class LoggingFilter(logging.Filter):
    def filter(self, record: logging.LogRecord):
S
Shuduo Sang 已提交
1853 1854
        if (record.levelno >= logging.INFO):
            return True  # info or above always log
S
Steven Li 已提交
1855

S
Steven Li 已提交
1856 1857 1858 1859
        # Commenting out below to adjust...

        # if msg.startswith("[TRD]"):
        #     return False
S
Steven Li 已提交
1860 1861
        return True

S
Shuduo Sang 已提交
1862 1863

class MyLoggingAdapter(logging.LoggerAdapter):
1864 1865 1866 1867
    def process(self, msg, kwargs):
        return "[{}]{}".format(threading.get_ident() % 10000, msg), kwargs
        # return '[%s] %s' % (self.extra['connid'], msg), kwargs

S
Shuduo Sang 已提交
1868 1869

class SvcManager:
1870
    MAX_QUEUE_SIZE = 10000
S
Shuduo Sang 已提交
1871

1872 1873 1874 1875
    def __init__(self):
        print("Starting service manager")
        signal.signal(signal.SIGTERM, self.sigIntHandler)
        signal.signal(signal.SIGINT, self.sigIntHandler)
1876
        signal.signal(signal.SIGUSR1, self.sigUsrHandler)
1877 1878 1879
        self.ioThread = None
        self.subProcess = None
        self.shouldStop = False
S
Shuduo Sang 已提交
1880 1881
        # self.status = MainExec.STATUS_RUNNING # set inside
        # _startTaosService()
1882 1883

    def svcOutputReader(self, out: IO, queue):
S
Shuduo Sang 已提交
1884 1885
        # Important Reference:
        # https://stackoverflow.com/questions/375427/non-blocking-read-on-a-subprocess-pipe-in-python
1886
        print("This is the svcOutput Reader...")
S
Shuduo Sang 已提交
1887
        # for line in out :
1888
        for line in iter(out.readline, b''):
1889
            # print("Finished reading a line: {}".format(line))
1890 1891
            # print("Adding item to queue...")
            line = line.decode("utf-8").rstrip()
S
Shuduo Sang 已提交
1892 1893 1894
            # This might block, and then causing "out" buffer to block
            queue.put(line)
            print("_i", end="", flush=True)
1895 1896 1897

            # Trim the queue if necessary
            oneTenthQSize = self.MAX_QUEUE_SIZE // 10
S
Shuduo Sang 已提交
1898
            if (queue.qsize() >= (self.MAX_QUEUE_SIZE - oneTenthQSize)):  # 90% full?
1899
                print("Triming IPC queue by: {}".format(oneTenthQSize))
S
Shuduo Sang 已提交
1900
                for i in range(0, oneTenthQSize):
1901 1902 1903
                    try:
                        queue.get_nowait()
                    except Empty:
S
Shuduo Sang 已提交
1904
                        break  # break out of for loop, no more trimming
1905

S
Shuduo Sang 已提交
1906
            if self.shouldStop:
1907 1908 1909 1910
                print("Stopping to read output from sub process")
                break

            # queue.put(line)
S
Shuduo Sang 已提交
1911 1912
        # meaning sub process must have died
        print("\nNo more output (most likely) from IO thread managing TDengine service")
1913 1914
        out.close()

1915 1916 1917 1918 1919 1920 1921 1922
    def _doMenu(self):
        choice = ""
        while True:
            print("\nInterrupting Service Program, Choose an Action: ")
            print("1: Resume")
            print("2: Terminate")
            print("3: Restart")
            # Remember to update the if range below
S
Shuduo Sang 已提交
1923
            # print("Enter Choice: ", end="", flush=True)
1924 1925 1926
            while choice == "":
                choice = input("Enter Choice: ")
                if choice != "":
S
Shuduo Sang 已提交
1927 1928 1929
                    break  # done with reading repeated input
            if choice in ["1", "2", "3"]:
                break  # we are done with whole method
1930
            print("Invalid choice, please try again.")
S
Shuduo Sang 已提交
1931
            choice = ""  # reset
1932 1933
        return choice

S
Shuduo Sang 已提交
1934
    def sigUsrHandler(self, signalNumber, frame):
1935
        print("Interrupting main thread execution upon SIGUSR1")
S
Shuduo Sang 已提交
1936
        if self.status != MainExec.STATUS_RUNNING:
1937
            print("Ignoring repeated SIG...")
S
Shuduo Sang 已提交
1938
            return  # do nothing if it's already not running
1939 1940 1941
        self.status = MainExec.STATUS_STOPPING

        choice = self._doMenu()
S
Shuduo Sang 已提交
1942 1943 1944 1945 1946
        if choice == "1":
            # TODO: can the sub-process be blocked due to us not reading from
            # queue?
            self.sigHandlerResume()
        elif choice == "2":
1947
            self.stopTaosService()
S
Shuduo Sang 已提交
1948
        elif choice == "3":
1949 1950
            self.stopTaosService()
            self.startTaosService()
1951 1952
        else:
            raise RuntimeError("Invalid menu choice: {}".format(choice))
1953 1954

    def sigIntHandler(self, signalNumber, frame):
1955
        print("Sig INT Handler starting...")
S
Shuduo Sang 已提交
1956
        if self.status != MainExec.STATUS_RUNNING:
1957 1958
            print("Ignoring repeated SIG_INT...")
            return
1959

S
Shuduo Sang 已提交
1960 1961 1962
        self.status = MainExec.STATUS_STOPPING  # immediately set our status
        self.stopTaosService()
        print("INT signal handler returning...")
1963

S
Shuduo Sang 已提交
1964
    def sigHandlerResume(self):
1965 1966
        print("Resuming TDengine service manager thread (main thread)...\n\n")
        self.status = MainExec.STATUS_RUNNING
1967 1968

    def joinIoThread(self):
S
Shuduo Sang 已提交
1969
        if self.ioThread:
1970
            self.ioThread.join()
S
Shuduo Sang 已提交
1971 1972
            self.ioThread = None
        else:
1973
            print("Joining empty thread, doing nothing")
S
Shuduo Sang 已提交
1974

1975
    TD_READY_MSG = "TDengine is initialized successfully"
S
Shuduo Sang 已提交
1976

1977
    def _procIpcBatch(self):
S
Shuduo Sang 已提交
1978 1979 1980 1981 1982 1983 1984 1985
        # Process all the output generated by the underlying sub process,
        # managed by IO thread
        while True:
            try:
                line = self.ipcQueue.get_nowait()  # getting output at fast speed
                print("_o", end="", flush=True)
                if self.status == MainExec.STATUS_STARTING:  # we are starting, let's see if we have started
                    if line.find(self.TD_READY_MSG) != -1:  # found
1986
                        self.status = MainExec.STATUS_RUNNING
S
Shuduo Sang 已提交
1987

1988 1989 1990
            except Empty:
                # time.sleep(2.3) # wait only if there's no output
                # no more output
S
Shuduo Sang 已提交
1991 1992
                return  # we are done with THIS BATCH
            else:  # got line
1993
                print(line)
1994

1995
    def _procIpcAll(self):
S
Shuduo Sang 已提交
1996
        while True:
1997
            print("<", end="", flush=True)
S
Shuduo Sang 已提交
1998
            self._procIpcBatch()  # process one batch
1999 2000 2001

            # check if the ioThread is still running
            if (not self.ioThread) or (not self.ioThread.is_alive()):
S
Shuduo Sang 已提交
2002 2003
                print(
                    "IO Thread (with subprocess) has ended, main thread now exiting...")
2004
                self.stopTaosService()
S
Shuduo Sang 已提交
2005 2006
                self._procIpcBatch()  # one more batch
                return  # TODO: maybe one last batch?
2007 2008 2009 2010

            # Maybe handler says we should exit now
            if self.shouldStop:
                print("Main thread ending all IPC processing with IOThread/SubProcess")
S
Shuduo Sang 已提交
2011
                self._procIpcBatch()  # one more batch
2012 2013
                return

2014
            print(">", end="", flush=True)
2015 2016
            time.sleep(0.5)

S
Shuduo Sang 已提交
2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031
    def getBuildPath(self):
        selfPath = os.path.dirname(os.path.realpath(__file__))
        if ("community" in selfPath):
            projPath = selfPath[:selfPath.find("communit")]
        else:
            projPath = selfPath[:selfPath.find("tests")]

        for root, dirs, files in os.walk(projPath):
            if ("taosd" in files):
                rootRealPath = os.path.dirname(os.path.realpath(root))
                if ("packaging" not in rootRealPath):
                    buildPath = root[:len(root) - len("/build/bin")]
                    break
        return buildPath

2032
    def startTaosService(self):
2033
        ON_POSIX = 'posix' in sys.builtin_module_names
S
Shuduo Sang 已提交
2034

2035 2036 2037
        taosdPath = self.getBuildPath() + "/build/bin/taosd"
        cfgPath = self.getBuildPath() + "/test/cfg"

S
Shuduo Sang 已提交
2038
        svcCmd = [taosdPath, '-c', cfgPath]
2039
        # svcCmd = ['vmstat', '1']
S
Shuduo Sang 已提交
2040
        if self.subProcess:  # already there
2041 2042
            raise RuntimeError("Corrupt process state")

S
Shuduo Sang 已提交
2043
        self.subProcess = subprocess.Popen(
S
Shuduo Sang 已提交
2044 2045
            svcCmd,
            stdout=subprocess.PIPE,
2046
            # bufsize=1, # not supported in binary mode
S
Shuduo Sang 已提交
2047
            close_fds=ON_POSIX)  # had text=True, which interferred with reading EOF
2048 2049
        self.ipcQueue = Queue()

S
Shuduo Sang 已提交
2050
        if self.ioThread:
2051
            raise RuntimeError("Corrupt thread state")
S
Shuduo Sang 已提交
2052 2053 2054 2055
        self.ioThread = threading.Thread(
            target=self.svcOutputReader, args=(
                self.subProcess.stdout, self.ipcQueue))
        self.ioThread.daemon = True  # thread dies with the program
2056 2057
        self.ioThread.start()

S
Shuduo Sang 已提交
2058
        self.shouldStop = False  # don't let the main loop stop
2059 2060 2061
        self.status = MainExec.STATUS_STARTING

        # wait for service to start
S
Shuduo Sang 已提交
2062
        for i in range(0, 10):
2063
            time.sleep(1.0)
S
Shuduo Sang 已提交
2064
            self._procIpcBatch()  # pump messages
2065
            print("_zz_", end="", flush=True)
S
Shuduo Sang 已提交
2066
            if self.status == MainExec.STATUS_RUNNING:
2067
                print("TDengine service READY to process requests")
S
Shuduo Sang 已提交
2068 2069 2070
                return  # now we've started
        # TODO: handle this better?
        raise RuntimeError("TDengine service did not start successfully")
2071 2072

    def stopTaosService(self):
2073 2074
        # can be called from both main thread or signal handler
        print("Terminating TDengine service running as the sub process...")
S
Shuduo Sang 已提交
2075 2076 2077 2078
        # Linux will send Control-C generated SIGINT to the TDengine process
        # already, ref:
        # https://unix.stackexchange.com/questions/176235/fork-and-how-signals-are-delivered-to-processes
        if not self.subProcess:
2079 2080 2081 2082
            print("Process already stopped")
            return

        retCode = self.subProcess.poll()
S
Shuduo Sang 已提交
2083
        if retCode:  # valid return code, process ended
2084
            self.subProcess = None
S
Shuduo Sang 已提交
2085 2086 2087 2088 2089 2090 2091
        else:  # process still alive, let's interrupt it
            print(
                "Sub process still running, sending SIG_INT and waiting for it to stop...")
            # sub process should end, then IPC queue should end, causing IO
            # thread to end
            self.subProcess.send_signal(signal.SIGINT)
            try:
2092 2093 2094 2095
                self.subProcess.wait(10)
            except subprocess.TimeoutExpired as err:
                print("Time out waiting for TDengine service process to exit")
            else:
2096
                print("TDengine service process terminated successfully from SIG_INT")
2097 2098 2099
                self.subProcess = None

        if self.subProcess and (not self.subProcess.poll()):
S
Shuduo Sang 已提交
2100 2101 2102 2103
            print(
                "Sub process is still running... pid = {}".format(
                    self.subProcess.pid))

2104 2105 2106 2107
        self.shouldStop = True
        self.joinIoThread()

    def run(self):
2108
        self.startTaosService()
2109

S
Shuduo Sang 已提交
2110
        # proc = subprocess.Popen(['echo', '"to stdout"'],
2111 2112 2113 2114 2115
        #                 stdout=subprocess.PIPE,
        #                 )
        # stdout_value = proc.communicate()[0]
        # print('\tstdout: {}'.format(repr(stdout_value)))

2116
        self._procIpcAll()
2117

2118
        print("End of loop reading from IPC queue")
S
Shuduo Sang 已提交
2119
        self.joinIoThread()  # should have started already
2120
        print("SvcManager Run Finished")
2121

S
Shuduo Sang 已提交
2122

2123 2124 2125 2126 2127 2128 2129 2130 2131 2132
class ClientManager:
    def __init__(self):
        print("Starting service manager")
        signal.signal(signal.SIGTERM, self.sigIntHandler)
        signal.signal(signal.SIGINT, self.sigIntHandler)

        self.status = MainExec.STATUS_RUNNING
        self.tc = None

    def sigIntHandler(self, signalNumber, frame):
S
Shuduo Sang 已提交
2133
        if self.status != MainExec.STATUS_RUNNING:
2134
            print("Ignoring repeated SIGINT...")
S
Shuduo Sang 已提交
2135 2136
            return  # do nothing if it's already not running
        self.status = MainExec.STATUS_STOPPING  # immediately set our status
2137 2138 2139 2140

        print("Terminating program...")
        self.tc.requestToStop()

S
Shuduo Sang 已提交
2141
    def _printLastNumbers(self):  # to verify data durability
2142 2143
        dbManager = DbManager(resetDb=False)
        dbc = dbManager.getDbConn()
S
Shuduo Sang 已提交
2144
        if dbc.query("show databases") == 0:  # no databae
2145
            return
S
Shuduo Sang 已提交
2146
        if dbc.query("show tables") == 0:  # no tables
S
Steven Li 已提交
2147
            return
2148 2149

        dbc.execute("use db")
S
Shuduo Sang 已提交
2150
        sTbName = dbManager.getFixedSuperTableName()
2151 2152

        # get all regular tables
S
Shuduo Sang 已提交
2153 2154
        # TODO: analyze result set later
        dbc.query("select TBNAME from db.{}".format(sTbName))
2155 2156 2157
        rTables = dbc.getQueryResult()

        bList = TaskExecutor.BoundedList()
S
Shuduo Sang 已提交
2158
        for rTbName in rTables:  # regular tables
2159 2160
            dbc.query("select speed from db.{}".format(rTbName[0]))
            numbers = dbc.getQueryResult()
S
Shuduo Sang 已提交
2161
            for row in numbers:
2162 2163 2164 2165 2166 2167
                # print("<{}>".format(n), end="", flush=True)
                bList.add(row[0])

        print("Top numbers in DB right now: {}".format(bList))
        print("TDengine client execution is about to start in 2 seconds...")
        time.sleep(2.0)
S
Shuduo Sang 已提交
2168
        dbManager = None  # release?
2169 2170 2171 2172 2173

    def prepare(self):
        self._printLastNumbers()

    def run(self):
S
Shuduo Sang 已提交
2174
        if gConfig.auto_start_service:
2175 2176 2177
            svcMgr = SvcManager()
            svcMgr.startTaosService()

2178 2179
        self._printLastNumbers()

S
Shuduo Sang 已提交
2180 2181
        dbManager = DbManager()  # Regular function
        Dice.seed(0)  # initial seeding of dice
2182
        thPool = ThreadPool(gConfig.num_threads, gConfig.max_steps)
2183
        self.tc = ThreadCoordinator(thPool, dbManager)
S
Shuduo Sang 已提交
2184

2185
        self.tc.run()
S
Steven Li 已提交
2186 2187
        # print("exec stats: {}".format(self.tc.getExecStats()))
        # print("TC failed = {}".format(self.tc.isFailed()))
2188
        self.conclude()
S
Shuduo Sang 已提交
2189
        if gConfig.auto_start_service:
2190
            svcMgr.stopTaosService()
S
Steven Li 已提交
2191
        # print("TC failed (2) = {}".format(self.tc.isFailed()))
S
Shuduo Sang 已提交
2192 2193
        # Linux return code: ref https://shapeshed.com/unix-exit-codes/
        return 1 if self.tc.isFailed() else 0
2194 2195 2196

    def conclude(self):
        self.tc.printStats()
S
Shuduo Sang 已提交
2197
        self.tc.getDbManager().cleanUp()
2198 2199 2200


class MainExec:
2201 2202 2203
    STATUS_STARTING = 1
    STATUS_RUNNING = 2
    STATUS_STOPPING = 3
2204 2205 2206 2207 2208
    # STATUS_STOPPED = 3 # Not used yet

    @classmethod
    def runClient(cls):
        clientManager = ClientManager()
S
Steven Li 已提交
2209
        return clientManager.run()
2210 2211 2212

    @classmethod
    def runService(cls):
2213 2214
        svcManager = SvcManager()
        svcManager.run()
2215 2216

    @classmethod
S
Shuduo Sang 已提交
2217
    def runTemp(cls):  # for debugging purposes
2218 2219
        # # Hack to exercise reading from disk, imcreasing coverage. TODO: fix
        # dbc = dbState.getDbConn()
S
Shuduo Sang 已提交
2220
        # sTbName = dbState.getFixedSuperTableName()
2221 2222
        # dbc.execute("create database if not exists db")
        # if not dbState.getState().equals(StateEmpty()):
S
Shuduo Sang 已提交
2223
        #     dbc.execute("use db")
2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234

        # rTables = None
        # try: # the super table may not exist
        #     sql = "select TBNAME from db.{}".format(sTbName)
        #     logger.info("Finding out tables in super table: {}".format(sql))
        #     dbc.query(sql) # TODO: analyze result set later
        #     logger.info("Fetching result")
        #     rTables = dbc.getQueryResult()
        #     logger.info("Result: {}".format(rTables))
        # except taos.error.ProgrammingError as err:
        #     logger.info("Initial Super table OPS error: {}".format(err))
S
Shuduo Sang 已提交
2235

2236 2237 2238 2239 2240 2241 2242 2243
        # # sys.exit()
        # if ( not rTables == None):
        #     # print("rTables[0] = {}, type = {}".format(rTables[0], type(rTables[0])))
        #     try:
        #         for rTbName in rTables : # regular tables
        #             ds = dbState
        #             logger.info("Inserting into table: {}".format(rTbName[0]))
        #             sql = "insert into db.{} values ('{}', {});".format(
S
Shuduo Sang 已提交
2244
        #                 rTbName[0],
2245 2246
        #                 ds.getNextTick(), ds.getNextInt())
        #             dbc.execute(sql)
S
Shuduo Sang 已提交
2247
        #         for rTbName in rTables : # regular tables
2248
        #             dbc.query("select * from db.{}".format(rTbName[0])) # TODO: check success failure
S
Shuduo Sang 已提交
2249
        #         logger.info("Initial READING operation is successful")
2250
        #     except taos.error.ProgrammingError as err:
S
Shuduo Sang 已提交
2251 2252
        #         logger.info("Initial WRITE/READ error: {}".format(err))

2253 2254 2255
        # Sandbox testing code
        # dbc = dbState.getDbConn()
        # while True:
S
Shuduo Sang 已提交
2256
        #     rows = dbc.query("show databases")
2257
        #     print("Rows: {}, time={}".format(rows, time.time()))
S
Shuduo Sang 已提交
2258 2259
        return

S
Steven Li 已提交
2260

2261
def main():
S
Shuduo Sang 已提交
2262 2263
    # Super cool Python argument library:
    # https://docs.python.org/3/library/argparse.html
2264 2265 2266 2267 2268 2269 2270 2271 2272
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description=textwrap.dedent('''\
            TDengine Auto Crash Generator (PLEASE NOTICE the Prerequisites Below)
            ---------------------------------------------------------------------
            1. You build TDengine in the top level ./build directory, as described in offical docs
            2. You run the server there before this script: ./build/bin/taosd -c test/cfg

            '''))
2273

S
Shuduo Sang 已提交
2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324
    parser.add_argument(
        '-a',
        '--auto-start-service',
        action='store_true',
        help='Automatically start/stop the TDengine service (default: false)')
    parser.add_argument(
        '-c',
        '--connector-type',
        action='store',
        default='native',
        type=str,
        help='Connector type to use: native, rest, or mixed (default: 10)')
    parser.add_argument(
        '-d',
        '--debug',
        action='store_true',
        help='Turn on DEBUG mode for more logging (default: false)')
    parser.add_argument(
        '-e',
        '--run-tdengine',
        action='store_true',
        help='Run TDengine service in foreground (default: false)')
    parser.add_argument(
        '-l',
        '--larger-data',
        action='store_true',
        help='Write larger amount of data during write operations (default: false)')
    parser.add_argument(
        '-p',
        '--per-thread-db-connection',
        action='store_true',
        help='Use a single shared db connection (default: false)')
    parser.add_argument(
        '-r',
        '--record-ops',
        action='store_true',
        help='Use a pair of always-fsynced fils to record operations performing + performed, for power-off tests (default: false)')
    parser.add_argument(
        '-s',
        '--max-steps',
        action='store',
        default=1000,
        type=int,
        help='Maximum number of steps to run (default: 100)')
    parser.add_argument(
        '-t',
        '--num-threads',
        action='store',
        default=5,
        type=int,
        help='Number of threads to run (default: 10)')
2325

2326
    global gConfig
2327
    gConfig = parser.parse_args()
2328

2329 2330 2331
    # if len(sys.argv) == 1:
    #     parser.print_help()
    #     sys.exit()
S
Shuduo Sang 已提交
2332

2333
    # Logging Stuff
2334
    global logger
S
Shuduo Sang 已提交
2335 2336
    _logger = logging.getLogger('CrashGen')  # real logger
    _logger.addFilter(LoggingFilter())
2337 2338 2339
    ch = logging.StreamHandler()
    _logger.addHandler(ch)

S
Shuduo Sang 已提交
2340 2341
    # Logging adapter, to be used as a logger
    logger = MyLoggingAdapter(_logger, [])
2342

S
Shuduo Sang 已提交
2343 2344
    if (gConfig.debug):
        logger.setLevel(logging.DEBUG)  # default seems to be INFO
S
Steven Li 已提交
2345 2346
    else:
        logger.setLevel(logging.INFO)
S
Shuduo Sang 已提交
2347

2348
    # Run server or client
S
Shuduo Sang 已提交
2349
    if gConfig.run_tdengine:  # run server
2350
        MainExec.runService()
S
Shuduo Sang 已提交
2351
    else:
S
Steven Li 已提交
2352
        return MainExec.runClient()
2353

S
Steven Li 已提交
2354
    # logger.info("Crash_Gen execution finished")
2355

S
Shuduo Sang 已提交
2356

2357
if __name__ == "__main__":
S
Steven Li 已提交
2358 2359 2360
    exitCode = main()
    # print("Exiting with code: {}".format(exitCode))
    sys.exit(exitCode)