Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
taosdata
TDengine
提交
6f3bf5b7
T
TDengine
项目概览
taosdata
/
TDengine
1 年多 前同步成功
通知
1185
Star
22016
Fork
4786
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
T
TDengine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
6f3bf5b7
编写于
7月 26, 2022
作者:
W
wade zhang
提交者:
GitHub
7月 26, 2022
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #15423 from taosdata/enh/crash_gen
Enhanced crash_gen tool to report better error for deadlocks
上级
292d0f78
d2e2a658
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
53 addition
and
24 deletion
+53
-24
tests/pytest/crash_gen/crash_gen_main.py
tests/pytest/crash_gen/crash_gen_main.py
+30
-24
tests/pytest/crash_gen/shared/db.py
tests/pytest/crash_gen/shared/db.py
+23
-0
未找到文件。
tests/pytest/crash_gen/crash_gen_main.py
浏览文件 @
6f3bf5b7
...
...
@@ -371,7 +371,9 @@ class ThreadCoordinator:
if
isinstance
(
err
,
CrashGenError
):
# our own transition failure
Logging
.
info
(
"State transition error"
)
# TODO: saw an error here once, let's print out stack info for err?
traceback
.
print_stack
()
traceback
.
print_stack
()
# Stack frame to here.
Logging
.
info
(
"Caused by:"
)
traceback
.
print_exception
(
*
sys
.
exc_info
())
# Ref: https://www.geeksforgeeks.org/how-to-print-exception-stack-trace-in-python/
transitionFailed
=
True
self
.
_te
=
None
# Not running any more
self
.
_execStats
.
registerFailure
(
"State transition error: {}"
.
format
(
err
))
...
...
@@ -741,7 +743,8 @@ class AnyState:
sCnt
+=
1
if
(
sCnt
>=
2
):
raise
CrashGenError
(
"Unexpected more than 1 success with task: {}, in task set: {}"
.
format
(
"Unexpected more than 1 success at state: {}, with task: {}, in task set: {}"
.
format
(
self
.
__class__
.
__name__
,
cls
.
__name__
,
# verified just now that isinstance(task, cls)
[
c
.
__class__
.
__name__
for
c
in
tasks
]
))
...
...
@@ -756,8 +759,11 @@ class AnyState:
if
task
.
isSuccess
():
sCnt
+=
1
if
(
exists
and
sCnt
<=
0
):
raise
CrashGenError
(
"Unexpected zero success for task type: {}, from tasks: {}"
.
format
(
cls
,
tasks
))
raise
CrashGenError
(
"Unexpected zero success at state: {}, with task: {}, in task set: {}"
.
format
(
self
.
__class__
.
__name__
,
cls
.
__name__
,
# verified just now that isinstance(task, cls)
[
c
.
__class__
.
__name__
for
c
in
tasks
]
))
def
assertNoTask
(
self
,
tasks
,
cls
):
for
task
in
tasks
:
...
...
@@ -809,8 +815,6 @@ class StateEmpty(AnyState):
]
def
verifyTasksToState
(
self
,
tasks
,
newState
):
if
Config
.
getConfig
().
ignore_errors
:
# if we are asked to ignore certain errors, let's not verify CreateDB success.
return
if
(
self
.
hasSuccess
(
tasks
,
TaskCreateDb
)
):
# at EMPTY, if there's succes in creating DB
if
(
not
self
.
hasTask
(
tasks
,
TaskDropDb
)):
# and no drop_db tasks
...
...
@@ -995,16 +999,17 @@ class StateMechine:
dbc
.
execute
(
"show dnodes"
)
# Generic Checks, first based on the start state
if
self
.
_curState
.
canCreateDb
():
self
.
_curState
.
assertIfExistThenSuccess
(
tasks
,
TaskCreateDb
)
# self.assertAtMostOneSuccess(tasks, CreateDbTask) # not really, in
# case of multiple creation and drops
if
self
.
_curState
.
canDropDb
():
if
gSvcMgr
==
None
:
# only if we are running as client-only
self
.
_curState
.
assertIfExistThenSuccess
(
tasks
,
TaskDropDb
)
# self.assertAtMostOneSuccess(tasks, DropDbTask) # not really in
# case of drop-create-drop
if
not
Config
.
getConfig
().
ignore_errors
:
# verify state, only if we are asked not to ignore certain errors.
if
self
.
_curState
.
canCreateDb
():
self
.
_curState
.
assertIfExistThenSuccess
(
tasks
,
TaskCreateDb
)
# self.assertAtMostOneSuccess(tasks, CreateDbTask) # not really, in
# case of multiple creation and drops
if
self
.
_curState
.
canDropDb
():
if
gSvcMgr
==
None
:
# only if we are running as client-only
self
.
_curState
.
assertIfExistThenSuccess
(
tasks
,
TaskDropDb
)
# self.assertAtMostOneSuccess(tasks, DropDbTask) # not really in
# case of drop-create-drop
# if self._state.canCreateFixedTable():
# self.assertIfExistThenSuccess(tasks, CreateFixedTableTask) # Not true, DB may be dropped
...
...
@@ -1026,7 +1031,8 @@ class StateMechine:
newState
=
self
.
_findCurrentState
(
dbc
)
Logging
.
debug
(
"[STT] New DB state determined: {}"
.
format
(
newState
))
# can old state move to new state through the tasks?
self
.
_curState
.
verifyTasksToState
(
tasks
,
newState
)
if
not
Config
.
getConfig
().
ignore_errors
:
# verify state, only if we are asked not to ignore certain errors.
self
.
_curState
.
verifyTasksToState
(
tasks
,
newState
)
self
.
_curState
=
newState
def
pickTaskType
(
self
):
...
...
@@ -2231,16 +2237,14 @@ class TaskAddData(StateTransitionTask):
class
ThreadStacks
:
# stack info for all threads
def
__init__
(
self
):
self
.
_allStacks
=
{}
allFrames
=
sys
.
_current_frames
()
# All current stack frames
allFrames
=
sys
.
_current_frames
()
# All current stack frames
, keyed with "ident"
for
th
in
threading
.
enumerate
():
# For each thread
if
th
.
ident
is
None
:
continue
stack
=
traceback
.
extract_stack
(
allFrames
[
th
.
ident
])
# Get stack for a thread
shortTid
=
th
.
ident
%
10000
stack
=
traceback
.
extract_stack
(
allFrames
[
th
.
ident
])
#type: ignore # Get stack for a thread
shortTid
=
th
.
native_id
%
10000
#type: ignore
self
.
_allStacks
[
shortTid
]
=
stack
# Was using th.native_id
def
print
(
self
,
filteredEndName
=
None
,
filterInternal
=
False
):
for
tIdent
,
stack
in
self
.
_allStacks
.
items
():
# for each thread, stack frames top to bottom
for
shortTid
,
stack
in
self
.
_allStacks
.
items
():
# for each thread, stack frames top to bottom
lastFrame
=
stack
[
-
1
]
if
filteredEndName
:
# we need to filter out stacks that match this name
if
lastFrame
.
name
==
filteredEndName
:
# end did not match
...
...
@@ -2252,7 +2256,9 @@ class ThreadStacks: # stack info for all threads
'__init__'
]:
# the thread that extracted the stack
continue
# ignore
# Now print
print
(
"
\n
<----- Thread Info for LWP/ID: {} (most recent call last) <-----"
.
format
(
tIdent
))
print
(
"
\n
<----- Thread Info for LWP/ID: {} (most recent call last) <-----"
.
format
(
shortTid
))
lastSqlForThread
=
DbConn
.
fetchSqlForThread
(
shortTid
)
print
(
"Last SQL statement attempted from thread {} is: {}"
.
format
(
shortTid
,
lastSqlForThread
))
stackFrame
=
0
for
frame
in
stack
:
# was using: reversed(stack)
# print(frame)
...
...
tests/pytest/crash_gen/shared/db.py
浏览文件 @
6f3bf5b7
...
...
@@ -27,6 +27,26 @@ class DbConn:
TYPE_REST
=
"rest-api"
TYPE_INVALID
=
"invalid"
# class variables
lastSqlFromThreads
:
dict
[
int
,
str
]
=
{}
# stored by thread id, obtained from threading.current_thread().ident%10000
@
classmethod
def
saveSqlForCurrentThread
(
cls
,
sql
:
str
):
'''
Let us save the last SQL statement on a per-thread basis, so that when later we
run into a dead-lock situation, we can pick out the deadlocked thread, and use
that information to find what what SQL statement is stuck.
'''
th
=
threading
.
current_thread
()
shortTid
=
th
.
native_id
%
10000
#type: ignore
cls
.
lastSqlFromThreads
[
shortTid
]
=
sql
# Save this for later
@
classmethod
def
fetchSqlForThread
(
cls
,
shortTid
:
int
)
->
str
:
if
shortTid
not
in
cls
.
lastSqlFromThreads
:
raise
CrashGenError
(
"No last-attempted-SQL found for thread id: {}"
.
format
(
shortTid
))
return
cls
.
lastSqlFromThreads
[
shortTid
]
@
classmethod
def
create
(
cls
,
connType
,
dbTarget
):
if
connType
==
cls
.
TYPE_NATIVE
:
...
...
@@ -163,6 +183,7 @@ class DbConnRest(DbConn):
def
_doSql
(
self
,
sql
):
self
.
_lastSql
=
sql
# remember this, last SQL attempted
self
.
saveSqlForCurrentThread
(
sql
)
# Save in global structure too. #TODO: combine with above
try
:
r
=
requests
.
post
(
self
.
_url
,
data
=
sql
,
...
...
@@ -392,6 +413,7 @@ class DbConnNative(DbConn):
"Cannot exec SQL unless db connection is open"
,
CrashGenError
.
DB_CONNECTION_NOT_OPEN
)
Logging
.
debug
(
"[SQL] Executing SQL: {}"
.
format
(
sql
))
self
.
_lastSql
=
sql
self
.
saveSqlForCurrentThread
(
sql
)
# Save in global structure too. #TODO: combine with above
nRows
=
self
.
_tdSql
.
execute
(
sql
)
cls
=
self
.
__class__
cls
.
totalRequests
+=
1
...
...
@@ -407,6 +429,7 @@ class DbConnNative(DbConn):
"Cannot query database until connection is open, restarting?"
,
CrashGenError
.
DB_CONNECTION_NOT_OPEN
)
Logging
.
debug
(
"[SQL] Executing SQL: {}"
.
format
(
sql
))
self
.
_lastSql
=
sql
self
.
saveSqlForCurrentThread
(
sql
)
# Save in global structure too. #TODO: combine with above
nRows
=
self
.
_tdSql
.
query
(
sql
)
cls
=
self
.
__class__
cls
.
totalRequests
+=
1
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录