diff --git a/tests/pytest/crash_gen/crash_gen_main.py b/tests/pytest/crash_gen/crash_gen_main.py index d5ffc1b7c1db1b86d238dffb1eb3b78724892ffe..7ab09383bfd0791aad30b39fa7e65ed66df9d936 100755 --- a/tests/pytest/crash_gen/crash_gen_main.py +++ b/tests/pytest/crash_gen/crash_gen_main.py @@ -371,7 +371,9 @@ class ThreadCoordinator: if isinstance(err, CrashGenError): # our own transition failure Logging.info("State transition error") # TODO: saw an error here once, let's print out stack info for err? - traceback.print_stack() + traceback.print_stack() # Stack frame to here. + Logging.info("Caused by:") + traceback.print_exception(*sys.exc_info()) # Ref: https://www.geeksforgeeks.org/how-to-print-exception-stack-trace-in-python/ transitionFailed = True self._te = None # Not running any more self._execStats.registerFailure("State transition error: {}".format(err)) @@ -741,7 +743,8 @@ class AnyState: sCnt += 1 if (sCnt >= 2): raise CrashGenError( - "Unexpected more than 1 success with task: {}, in task set: {}".format( + "Unexpected more than 1 success at state: {}, with task: {}, in task set: {}".format( + self.__class__.__name__, cls.__name__, # verified just now that isinstance(task, cls) [c.__class__.__name__ for c in tasks] )) @@ -756,8 +759,11 @@ class AnyState: if task.isSuccess(): sCnt += 1 if (exists and sCnt <= 0): - raise CrashGenError("Unexpected zero success for task type: {}, from tasks: {}" - .format(cls, tasks)) + raise CrashGenError("Unexpected zero success at state: {}, with task: {}, in task set: {}".format( + self.__class__.__name__, + cls.__name__, # verified just now that isinstance(task, cls) + [c.__class__.__name__ for c in tasks] + )) def assertNoTask(self, tasks, cls): for task in tasks: @@ -809,8 +815,6 @@ class StateEmpty(AnyState): ] def verifyTasksToState(self, tasks, newState): - if Config.getConfig().ignore_errors: # if we are asked to ignore certain errors, let's not verify CreateDB success. - return if (self.hasSuccess(tasks, TaskCreateDb) ): # at EMPTY, if there's succes in creating DB if (not self.hasTask(tasks, TaskDropDb)): # and no drop_db tasks @@ -995,16 +999,17 @@ class StateMechine: dbc.execute("show dnodes") # Generic Checks, first based on the start state - if self._curState.canCreateDb(): - self._curState.assertIfExistThenSuccess(tasks, TaskCreateDb) - # self.assertAtMostOneSuccess(tasks, CreateDbTask) # not really, in - # case of multiple creation and drops - - if self._curState.canDropDb(): - if gSvcMgr == None: # only if we are running as client-only - self._curState.assertIfExistThenSuccess(tasks, TaskDropDb) - # self.assertAtMostOneSuccess(tasks, DropDbTask) # not really in - # case of drop-create-drop + if not Config.getConfig().ignore_errors: # verify state, only if we are asked not to ignore certain errors. + if self._curState.canCreateDb(): + self._curState.assertIfExistThenSuccess(tasks, TaskCreateDb) + # self.assertAtMostOneSuccess(tasks, CreateDbTask) # not really, in + # case of multiple creation and drops + + if self._curState.canDropDb(): + if gSvcMgr == None: # only if we are running as client-only + self._curState.assertIfExistThenSuccess(tasks, TaskDropDb) + # self.assertAtMostOneSuccess(tasks, DropDbTask) # not really in + # case of drop-create-drop # if self._state.canCreateFixedTable(): # self.assertIfExistThenSuccess(tasks, CreateFixedTableTask) # Not true, DB may be dropped @@ -1026,7 +1031,8 @@ class StateMechine: newState = self._findCurrentState(dbc) Logging.debug("[STT] New DB state determined: {}".format(newState)) # can old state move to new state through the tasks? - self._curState.verifyTasksToState(tasks, newState) + if not Config.getConfig().ignore_errors: # verify state, only if we are asked not to ignore certain errors. + self._curState.verifyTasksToState(tasks, newState) self._curState = newState def pickTaskType(self): @@ -2231,16 +2237,14 @@ class TaskAddData(StateTransitionTask): class ThreadStacks: # stack info for all threads def __init__(self): self._allStacks = {} - allFrames = sys._current_frames() # All current stack frames + allFrames = sys._current_frames() # All current stack frames, keyed with "ident" for th in threading.enumerate(): # For each thread - if th.ident is None: - continue - stack = traceback.extract_stack(allFrames[th.ident]) # Get stack for a thread - shortTid = th.ident % 10000 + stack = traceback.extract_stack(allFrames[th.ident]) #type: ignore # Get stack for a thread + shortTid = th.native_id % 10000 #type: ignore self._allStacks[shortTid] = stack # Was using th.native_id def print(self, filteredEndName = None, filterInternal = False): - for tIdent, stack in self._allStacks.items(): # for each thread, stack frames top to bottom + for shortTid, stack in self._allStacks.items(): # for each thread, stack frames top to bottom lastFrame = stack[-1] if filteredEndName: # we need to filter out stacks that match this name if lastFrame.name == filteredEndName : # end did not match @@ -2252,7 +2256,9 @@ class ThreadStacks: # stack info for all threads '__init__']: # the thread that extracted the stack continue # ignore # Now print - print("\n<----- Thread Info for LWP/ID: {} (most recent call last) <-----".format(tIdent)) + print("\n<----- Thread Info for LWP/ID: {} (most recent call last) <-----".format(shortTid)) + lastSqlForThread = DbConn.fetchSqlForThread(shortTid) + print("Last SQL statement attempted from thread {} is: {}".format(shortTid, lastSqlForThread)) stackFrame = 0 for frame in stack: # was using: reversed(stack) # print(frame) diff --git a/tests/pytest/crash_gen/shared/db.py b/tests/pytest/crash_gen/shared/db.py index 75931ace48ed65708c7dfa97d01a426a0baa8203..6da0216d95cefe17512be8f119888bab1d14b7e8 100644 --- a/tests/pytest/crash_gen/shared/db.py +++ b/tests/pytest/crash_gen/shared/db.py @@ -27,6 +27,26 @@ class DbConn: TYPE_REST = "rest-api" TYPE_INVALID = "invalid" + # class variables + lastSqlFromThreads : dict[int, str] = {} # stored by thread id, obtained from threading.current_thread().ident%10000 + + @classmethod + def saveSqlForCurrentThread(cls, sql: str): + ''' + Let us save the last SQL statement on a per-thread basis, so that when later we + run into a dead-lock situation, we can pick out the deadlocked thread, and use + that information to find what what SQL statement is stuck. + ''' + th = threading.current_thread() + shortTid = th.native_id % 10000 #type: ignore + cls.lastSqlFromThreads[shortTid] = sql # Save this for later + + @classmethod + def fetchSqlForThread(cls, shortTid : int) -> str : + if shortTid not in cls.lastSqlFromThreads: + raise CrashGenError("No last-attempted-SQL found for thread id: {}".format(shortTid)) + return cls.lastSqlFromThreads[shortTid] + @classmethod def create(cls, connType, dbTarget): if connType == cls.TYPE_NATIVE: @@ -163,6 +183,7 @@ class DbConnRest(DbConn): def _doSql(self, sql): self._lastSql = sql # remember this, last SQL attempted + self.saveSqlForCurrentThread(sql) # Save in global structure too. #TODO: combine with above try: r = requests.post(self._url, data = sql, @@ -392,6 +413,7 @@ class DbConnNative(DbConn): "Cannot exec SQL unless db connection is open", CrashGenError.DB_CONNECTION_NOT_OPEN) Logging.debug("[SQL] Executing SQL: {}".format(sql)) self._lastSql = sql + self.saveSqlForCurrentThread(sql) # Save in global structure too. #TODO: combine with above nRows = self._tdSql.execute(sql) cls = self.__class__ cls.totalRequests += 1 @@ -407,6 +429,7 @@ class DbConnNative(DbConn): "Cannot query database until connection is open, restarting?", CrashGenError.DB_CONNECTION_NOT_OPEN) Logging.debug("[SQL] Executing SQL: {}".format(sql)) self._lastSql = sql + self.saveSqlForCurrentThread(sql) # Save in global structure too. #TODO: combine with above nRows = self._tdSql.query(sql) cls = self.__class__ cls.totalRequests += 1