提交 a519389d 编写于 作者: S Steven Li

Now supporting CTRL-C abort in Crash_Gen, plus tracking top numbers

上级 88ac7bb6
...@@ -15,6 +15,8 @@ from __future__ import annotations # For type hinting before definition, ref: h ...@@ -15,6 +15,8 @@ from __future__ import annotations # For type hinting before definition, ref: h
import sys import sys
import os import os
import io
import signal
import traceback import traceback
# Require Python 3 # Require Python 3
if sys.version_info[0] < 3: if sys.version_info[0] < 3:
...@@ -36,6 +38,8 @@ from requests.auth import HTTPBasicAuth ...@@ -36,6 +38,8 @@ from requests.auth import HTTPBasicAuth
from typing import List from typing import List
from typing import Dict from typing import Dict
from typing import Set from typing import Set
from typing import IO
from queue import Queue, Empty
from util.log import * from util.log import *
from util.dnodes import * from util.dnodes import *
...@@ -205,6 +209,7 @@ class WorkerThread: ...@@ -205,6 +209,7 @@ class WorkerThread:
# else: # else:
# return self._tc.getDbState().getDbConn().query(sql) # return self._tc.getDbState().getDbConn().query(sql)
# The coordinator of all worker threads, mostly running in main thread
class ThreadCoordinator: class ThreadCoordinator:
def __init__(self, pool: ThreadPool, dbManager): def __init__(self, pool: ThreadPool, dbManager):
self._curStep = -1 # first step is 0 self._curStep = -1 # first step is 0
...@@ -217,6 +222,7 @@ class ThreadCoordinator: ...@@ -217,6 +222,7 @@ class ThreadCoordinator:
self._stepBarrier = threading.Barrier(self._pool.numThreads + 1) # one barrier for all threads self._stepBarrier = threading.Barrier(self._pool.numThreads + 1) # one barrier for all threads
self._execStats = ExecutionStats() self._execStats = ExecutionStats()
self._runStatus = MainExec.STATUS_RUNNING
def getTaskExecutor(self): def getTaskExecutor(self):
return self._te return self._te
...@@ -227,6 +233,10 @@ class ThreadCoordinator: ...@@ -227,6 +233,10 @@ class ThreadCoordinator:
def crossStepBarrier(self): def crossStepBarrier(self):
self._stepBarrier.wait() self._stepBarrier.wait()
def requestToStop(self):
self._runStatus = MainExec.STATUS_STOPPING
self._execStats.registerFailure("User Interruption")
def run(self): def run(self):
self._pool.createAndStartThreads(self) self._pool.createAndStartThreads(self)
...@@ -234,41 +244,56 @@ class ThreadCoordinator: ...@@ -234,41 +244,56 @@ class ThreadCoordinator:
self._curStep = -1 # not started yet self._curStep = -1 # not started yet
maxSteps = gConfig.max_steps # type: ignore maxSteps = gConfig.max_steps # type: ignore
self._execStats.startExec() # start the stop watch self._execStats.startExec() # start the stop watch
failed = False transitionFailed = False
while(self._curStep < maxSteps-1 and not failed): # maxStep==10, last curStep should be 9 hasAbortedTask = False
while(self._curStep < maxSteps-1 and
(not transitionFailed) and
(self._runStatus==MainExec.STATUS_RUNNING) and
(not hasAbortedTask)): # maxStep==10, last curStep should be 9
if not gConfig.debug: if not gConfig.debug:
print(".", end="", flush=True) # print this only if we are not in debug mode print(".", end="", flush=True) # print this only if we are not in debug mode
logger.debug("[TRD] Main thread going to sleep") logger.debug("[TRD] Main thread going to sleep")
# Now ready to enter a step # Now main thread (that's us) is ready to enter a step
self.crossStepBarrier() # let other threads go past the pool barrier, but wait at the thread gate self.crossStepBarrier() # let other threads go past the pool barrier, but wait at the thread gate
self._stepBarrier.reset() # Other worker threads should now be at the "gate" self._stepBarrier.reset() # Other worker threads should now be at the "gate"
# At this point, all threads should be pass the overall "barrier" and before the per-thread "gate" # At this point, all threads should be pass the overall "barrier" and before the per-thread "gate"
try: # We use this period to do house keeping work, when all worker threads are QUIET.
sm = self._dbManager.getStateMachine() hasAbortedTask = False
logger.debug("[STT] starting transitions") for task in self._executedTasks :
sm.transition(self._executedTasks) # at end of step, transiton the DB state if task.isAborted() :
logger.debug("[STT] transition ended") print("Task aborted: {}".format(task))
# Due to limitation (or maybe not) of the Python library, we cannot share connections across threads hasAbortedTask = True
if sm.hasDatabase() : break
for t in self._pool.threadList:
logger.debug("[DB] use db for all worker threads") if hasAbortedTask : # do transition only if tasks are error free
t.useDb() self._execStats.registerFailure("Aborted Task Encountered")
# t.execSql("use db") # main thread executing "use db" on behalf of every worker thread else:
try:
except taos.error.ProgrammingError as err: sm = self._dbManager.getStateMachine()
if ( err.msg == 'network unavailable' ): # broken DB connection logger.debug("[STT] starting transitions")
logger.info("DB connection broken, execution failed") sm.transition(self._executedTasks) # at end of step, transiton the DB state
traceback.print_stack() logger.debug("[STT] transition ended")
failed = True # Due to limitation (or maybe not) of the Python library, we cannot share connections across threads
self._te = None # Not running any more if sm.hasDatabase() :
self._execStats.registerFailure("Broken DB Connection") for t in self._pool.threadList:
# continue # don't do that, need to tap all threads at end, and maybe signal them to stop logger.debug("[DB] use db for all worker threads")
else: t.useDb()
raise # t.execSql("use db") # main thread executing "use db" on behalf of every worker thread
finally: except taos.error.ProgrammingError as err:
pass if ( err.msg == 'network unavailable' ): # broken DB connection
logger.info("DB connection broken, execution failed")
traceback.print_stack()
transitionFailed = True
self._te = None # Not running any more
self._execStats.registerFailure("Broken DB Connection")
# continue # don't do that, need to tap all threads at end, and maybe signal them to stop
else:
raise
# finally:
# pass
self.resetExecutedTasks() # clear the tasks after we are done self.resetExecutedTasks() # clear the tasks after we are done
...@@ -278,14 +303,14 @@ class ThreadCoordinator: ...@@ -278,14 +303,14 @@ class ThreadCoordinator:
logger.debug("\r\n\n--> Step {} starts with main thread waking up".format(self._curStep)) # Now not all threads had time to go to sleep logger.debug("\r\n\n--> Step {} starts with main thread waking up".format(self._curStep)) # Now not all threads had time to go to sleep
# A new TE for the new step # A new TE for the new step
if not failed: # only if not failed if not transitionFailed: # only if not failed
self._te = TaskExecutor(self._curStep) self._te = TaskExecutor(self._curStep)
logger.debug("[TRD] Main thread waking up at step {}, tapping worker threads".format(self._curStep)) # Now not all threads had time to go to sleep logger.debug("[TRD] Main thread waking up at step {}, tapping worker threads".format(self._curStep)) # Now not all threads had time to go to sleep
self.tapAllThreads() self.tapAllThreads() # Worker threads will wake up at this point, and each execute it's own task
logger.debug("Main thread ready to finish up...") logger.debug("Main thread ready to finish up...")
if not failed: # only in regular situations if not transitionFailed: # only in regular situations
self.crossStepBarrier() # Cross it one last time, after all threads finish self.crossStepBarrier() # Cross it one last time, after all threads finish
self._stepBarrier.reset() self._stepBarrier.reset()
logger.debug("Main thread in exclusive zone...") logger.debug("Main thread in exclusive zone...")
...@@ -298,8 +323,8 @@ class ThreadCoordinator: ...@@ -298,8 +323,8 @@ class ThreadCoordinator:
logger.info("\nAll worker threads finished") logger.info("\nAll worker threads finished")
self._execStats.endExec() self._execStats.endExec()
def logStats(self): def printStats(self):
self._execStats.logStats() self._execStats.printStats()
def tapAllThreads(self): # in a deterministic manner def tapAllThreads(self): # in a deterministic manner
wakeSeq = [] wakeSeq = []
...@@ -1061,15 +1086,60 @@ class DbManager(): ...@@ -1061,15 +1086,60 @@ class DbManager():
self._dbConn.close() self._dbConn.close()
class TaskExecutor(): class TaskExecutor():
class BoundedList:
def __init__(self, size = 10):
self._size = size
self._list = []
def add(self, n: int) :
if not self._list: # empty
self._list.append(n)
return
# now we should insert
nItems = len(self._list)
insPos = 0
for i in range(nItems):
insPos = i
if n <= self._list[i] : # smaller than this item, time to insert
break # found the insertion point
insPos += 1 # insert to the right
if insPos == 0 : # except for the 1st item, # TODO: elimiate first item as gating item
return # do nothing
# print("Inserting at postion {}, value: {}".format(insPos, n))
self._list.insert(insPos, n) # insert
newLen = len(self._list)
if newLen <= self._size :
return # do nothing
elif newLen == (self._size + 1) :
del self._list[0] # remove the first item
else :
raise RuntimeError("Corrupt Bounded List")
def __str__(self):
return repr(self._list)
_boundedList = BoundedList()
def __init__(self, curStep): def __init__(self, curStep):
self._curStep = curStep self._curStep = curStep
@classmethod
def getBoundedList(cls):
return cls._boundedList
def getCurStep(self): def getCurStep(self):
return self._curStep return self._curStep
def execute(self, task: Task, wt: WorkerThread): # execute a task on a thread def execute(self, task: Task, wt: WorkerThread): # execute a task on a thread
task.execute(wt) task.execute(wt)
def recordDataMark(self, n: int):
# print("[{}]".format(n), end="", flush=True)
self._boundedList.add(n)
# def logInfo(self, msg): # def logInfo(self, msg):
# logger.info(" T[{}.x]: ".format(self._curStep) + msg) # logger.info(" T[{}.x]: ".format(self._curStep) + msg)
...@@ -1089,6 +1159,7 @@ class Task(): ...@@ -1089,6 +1159,7 @@ class Task():
self._dbManager = dbManager self._dbManager = dbManager
self._workerThread = None self._workerThread = None
self._err = None self._err = None
self._aborted = False
self._curStep = None self._curStep = None
self._numRows = None # Number of rows affected self._numRows = None # Number of rows affected
...@@ -1102,6 +1173,9 @@ class Task(): ...@@ -1102,6 +1173,9 @@ class Task():
def isSuccess(self): def isSuccess(self):
return self._err == None return self._err == None
def isAborted(self):
return self._aborted
def clone(self): # TODO: why do we need this again? def clone(self): # TODO: why do we need this again?
newTask = self.__class__(self._dbManager, self._execStats) newTask = self.__class__(self._dbManager, self._execStats)
return newTask return newTask
...@@ -1143,7 +1217,9 @@ class Task(): ...@@ -1143,7 +1217,9 @@ class Task():
else: # non-debug else: # non-debug
print("\n\n----------------------------\nProgram ABORTED Due to Unexpected TAOS Error: \n\n{}\n".format(errMsg) + print("\n\n----------------------------\nProgram ABORTED Due to Unexpected TAOS Error: \n\n{}\n".format(errMsg) +
"----------------------------\n") "----------------------------\n")
sys.exit(-1) # sys.exit(-1)
self._err = err
self._aborted = True
except: except:
self.logDebug("[=] Unexpected exception, SQL: {}".format(self._lastSql)) self.logDebug("[=] Unexpected exception, SQL: {}".format(self._lastSql))
raise raise
...@@ -1213,7 +1289,7 @@ class ExecutionStats: ...@@ -1213,7 +1289,7 @@ class ExecutionStats:
self._failed = True self._failed = True
self._failureReason = reason self._failureReason = reason
def logStats(self): def printStats(self):
logger.info("----------------------------------------------------------------------") logger.info("----------------------------------------------------------------------")
logger.info("| Crash_Gen test {}, with the following stats:". logger.info("| Crash_Gen test {}, with the following stats:".
format("FAILED (reason: {})".format(self._failureReason) if self._failed else "SUCCEEDED")) format("FAILED (reason: {})".format(self._failureReason) if self._failed else "SUCCEEDED"))
...@@ -1228,6 +1304,7 @@ class ExecutionStats: ...@@ -1228,6 +1304,7 @@ class ExecutionStats:
logger.info("| Total Task Busy Time (elapsed time when any task is in progress): {:.3f} seconds".format(self._accRunTime)) logger.info("| Total Task Busy Time (elapsed time when any task is in progress): {:.3f} seconds".format(self._accRunTime))
logger.info("| Average Per-Task Execution Time: {:.3f} seconds".format(self._accRunTime/execTimesAny)) logger.info("| Average Per-Task Execution Time: {:.3f} seconds".format(self._accRunTime/execTimesAny))
logger.info("| Total Elapsed Time (from wall clock): {:.3f} seconds".format(self._elapsedTime)) logger.info("| Total Elapsed Time (from wall clock): {:.3f} seconds".format(self._elapsedTime))
logger.info("| Top numbers written: {}".format(TaskExecutor.getBoundedList()))
logger.info("----------------------------------------------------------------------") logger.info("----------------------------------------------------------------------")
...@@ -1449,6 +1526,8 @@ class TaskAddData(StateTransitionTask): ...@@ -1449,6 +1526,8 @@ class TaskAddData(StateTransitionTask):
ds.getNextBinary(), ds.getNextFloat(), ds.getNextBinary(), ds.getNextFloat(),
ds.getNextTick(), nextInt) ds.getNextTick(), nextInt)
self.execWtSql(wt, sql) self.execWtSql(wt, sql)
# Successfully wrote the data into the DB, let's record it somehow
te.recordDataMark(nextInt)
if gConfig.record_ops: if gConfig.record_ops:
self.fAddLogDone.write("Wrote {} to {}\n".format(nextInt, regTableName)) self.fAddLogDone.write("Wrote {} to {}\n".format(nextInt, regTableName))
self.fAddLogDone.flush() self.fAddLogDone.flush()
...@@ -1528,23 +1607,152 @@ class MyLoggingAdapter(logging.LoggerAdapter): ...@@ -1528,23 +1607,152 @@ class MyLoggingAdapter(logging.LoggerAdapter):
return "[{}]{}".format(threading.get_ident() % 10000, msg), kwargs return "[{}]{}".format(threading.get_ident() % 10000, msg), kwargs
# return '[%s] %s' % (self.extra['connid'], msg), kwargs # return '[%s] %s' % (self.extra['connid'], msg), kwargs
class MainExec: class SvcManager:
@classmethod
def runClient(cls): def __init__(self):
# resetDb = False # DEBUG only print("Starting service manager")
# dbState = DbState(resetDb) # DBEUG only! signal.signal(signal.SIGTERM, self.sigIntHandler)
signal.signal(signal.SIGINT, self.sigIntHandler)
self.ioThread = None
self.subProcess = None
self.shouldStop = False
self.status = MainExec.STATUS_RUNNING
def svcOutputReader(self, out: IO, queue):
# print("This is the svcOutput Reader...")
for line in out : # iter(out.readline, b''):
# print("Finished reading a line: {}".format(line))
queue.put(line.rstrip()) # get rid of new lines
print("No more output from incoming IO") # meaning sub process must have died
out.close()
def sigIntHandler(self, signalNumber, frame):
if self.status != MainExec.STATUS_RUNNING :
print("Ignoring repeated SIGINT...")
return # do nothing if it's already not running
self.status = MainExec.STATUS_STOPPING # immediately set our status
print("Terminating program...")
self.subProcess.send_signal(signal.SIGINT)
self.shouldStop = True
self.joinIoThread()
def joinIoThread(self):
if self.ioThread :
self.ioThread.join()
self.ioThread = None
def run(self):
ON_POSIX = 'posix' in sys.builtin_module_names
svcCmd = ['../../build/build/bin/taosd', '-c', '../../build/test/cfg']
# svcCmd = ['vmstat', '1']
self.subProcess = subprocess.Popen(svcCmd, stdout=subprocess.PIPE, bufsize=1, close_fds=ON_POSIX, text=True)
q = Queue()
self.ioThread = threading.Thread(target=self.svcOutputReader, args=(self.subProcess.stdout, q))
self.ioThread.daemon = True # thread dies with the program
self.ioThread.start()
# proc = subprocess.Popen(['echo', '"to stdout"'],
# stdout=subprocess.PIPE,
# )
# stdout_value = proc.communicate()[0]
# print('\tstdout: {}'.format(repr(stdout_value)))
while True :
try:
line = q.get_nowait() # getting output at fast speed
except Empty:
# print('no output yet')
time.sleep(2.3) # wait only if there's no output
else: # got line
print(line)
# print("----end of iteration----")
if self.shouldStop:
print("Ending main Svc thread")
break
print("end of loop")
self.joinIoThread()
print("Finished")
class ClientManager:
def __init__(self):
print("Starting service manager")
signal.signal(signal.SIGTERM, self.sigIntHandler)
signal.signal(signal.SIGINT, self.sigIntHandler)
self.status = MainExec.STATUS_RUNNING
self.tc = None
def sigIntHandler(self, signalNumber, frame):
if self.status != MainExec.STATUS_RUNNING :
print("Ignoring repeated SIGINT...")
return # do nothing if it's already not running
self.status = MainExec.STATUS_STOPPING # immediately set our status
print("Terminating program...")
self.tc.requestToStop()
def _printLastNumbers(self): # to verify data durability
dbManager = DbManager(resetDb=False)
dbc = dbManager.getDbConn()
if dbc.query("show databases") == 0 : # no databae
return
dbc.execute("use db")
sTbName = dbManager.getFixedSuperTableName()
# get all regular tables
dbc.query("select TBNAME from db.{}".format(sTbName)) # TODO: analyze result set later
rTables = dbc.getQueryResult()
bList = TaskExecutor.BoundedList()
for rTbName in rTables : # regular tables
dbc.query("select speed from db.{}".format(rTbName[0]))
numbers = dbc.getQueryResult()
for row in numbers :
# print("<{}>".format(n), end="", flush=True)
bList.add(row[0])
print("Top numbers in DB right now: {}".format(bList))
print("TDengine client execution is about to start in 2 seconds...")
time.sleep(2.0)
dbManager = None # release?
def prepare(self):
self._printLastNumbers()
def run(self):
self._printLastNumbers()
dbManager = DbManager() # Regular function dbManager = DbManager() # Regular function
Dice.seed(0) # initial seeding of dice Dice.seed(0) # initial seeding of dice
thPool = ThreadPool(gConfig.num_threads, gConfig.max_steps) thPool = ThreadPool(gConfig.num_threads, gConfig.max_steps)
tc = ThreadCoordinator(thPool, dbManager) self.tc = ThreadCoordinator(thPool, dbManager)
tc.run() self.tc.run()
tc.logStats() self.conclude()
dbManager.cleanUp()
def conclude(self):
self.tc.printStats()
self.tc.getDbManager().cleanUp()
class MainExec:
STATUS_RUNNING = 1
STATUS_STOPPING = 2
# STATUS_STOPPED = 3 # Not used yet
@classmethod
def runClient(cls):
clientManager = ClientManager()
clientManager.run()
@classmethod @classmethod
def runService(cls): def runService(cls):
print("Running service...") svcManager = SvcManager()
svcManager.run()
@classmethod @classmethod
def runTemp(cls): # for debugging purposes def runTemp(cls): # for debugging purposes
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册