Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
taosdata
TDengine
提交
d8206d22
T
TDengine
项目概览
taosdata
/
TDengine
大约 1 年 前同步成功
通知
1185
Star
22015
Fork
4786
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
T
TDengine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
d8206d22
编写于
7月 14, 2020
作者:
S
Steven Li
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refactoring thread synchronization for crash_gen tool
上级
a75721ff
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
108 addition
and
93 deletion
+108
-93
tests/pytest/crash_gen.py
tests/pytest/crash_gen.py
+108
-93
未找到文件。
tests/pytest/crash_gen.py
浏览文件 @
d8206d22
...
@@ -130,22 +130,15 @@ class WorkerThread:
...
@@ -130,22 +130,15 @@ class WorkerThread:
while
True
:
while
True
:
tc
=
self
.
_tc
# Thread Coordinator, the overall master
tc
=
self
.
_tc
# Thread Coordinator, the overall master
tc
.
crossStepBarrier
()
# shared barrier first, INCLUDING the last one
tc
.
crossStepBarrier
()
# shared barrier first, INCLUDING the last one
logger
.
debug
(
logger
.
debug
(
"[TRD] Worker thread [{}] exited barrier..."
.
format
(
self
.
_tid
))
"[TRD] Worker thread [{}] exited barrier..."
.
format
(
self
.
_tid
))
self
.
crossStepGate
()
# then per-thread gate, after being tapped
self
.
crossStepGate
()
# then per-thread gate, after being tapped
logger
.
debug
(
logger
.
debug
(
"[TRD] Worker thread [{}] exited step gate..."
.
format
(
self
.
_tid
))
"[TRD] Worker thread [{}] exited step gate..."
.
format
(
self
.
_tid
))
if
not
self
.
_tc
.
isRunning
():
if
not
self
.
_tc
.
isRunning
():
logger
.
debug
(
logger
.
debug
(
"[TRD] Thread Coordinator not running any more, worker thread now stopping..."
)
"[TRD] Thread Coordinator not running any more, worker thread now stopping..."
)
break
break
# Fetch a task from the Thread Coordinator
# Fetch a task from the Thread Coordinator
logger
.
debug
(
logger
.
debug
(
"[TRD] Worker thread [{}] about to fetch task"
.
format
(
self
.
_tid
))
"[TRD] Worker thread [{}] about to fetch task"
.
format
(
self
.
_tid
))
task
=
tc
.
fetchTask
()
task
=
tc
.
fetchTask
()
# Execute such a task
# Execute such a task
...
@@ -154,9 +147,7 @@ class WorkerThread:
...
@@ -154,9 +147,7 @@ class WorkerThread:
self
.
_tid
,
task
.
__class__
.
__name__
))
self
.
_tid
,
task
.
__class__
.
__name__
))
task
.
execute
(
self
)
task
.
execute
(
self
)
tc
.
saveExecutedTask
(
task
)
tc
.
saveExecutedTask
(
task
)
logger
.
debug
(
logger
.
debug
(
"[TRD] Worker thread [{}] finished executing task"
.
format
(
self
.
_tid
))
"[TRD] Worker thread [{}] finished executing task"
.
format
(
self
.
_tid
))
self
.
_dbInUse
=
False
# there may be changes between steps
self
.
_dbInUse
=
False
# there may be changes between steps
...
@@ -255,101 +246,124 @@ class ThreadCoordinator:
...
@@ -255,101 +246,124 @@ class ThreadCoordinator:
self
.
_runStatus
=
MainExec
.
STATUS_STOPPING
self
.
_runStatus
=
MainExec
.
STATUS_STOPPING
self
.
_execStats
.
registerFailure
(
"User Interruption"
)
self
.
_execStats
.
registerFailure
(
"User Interruption"
)
def
_runShouldEnd
(
self
,
transitionFailed
,
hasAbortedTask
):
maxSteps
=
gConfig
.
max_steps
# type: ignore
if
self
.
_curStep
>=
(
maxSteps
-
1
):
# maxStep==10, last curStep should be 9
return
True
if
self
.
_runStatus
!=
MainExec
.
STATUS_RUNNING
:
return
True
if
transitionFailed
:
return
True
if
hasAbortedTask
:
return
True
return
False
def
_hasAbortedTask
(
self
):
# from execution of previous step
for
task
in
self
.
_executedTasks
:
if
task
.
isAborted
():
# print("Task aborted: {}".format(task))
# hasAbortedTask = True
return
True
return
False
def
_releaseAllWorkerThreads
(
self
,
transitionFailed
):
self
.
_curStep
+=
1
# we are about to get into next step. TODO: race condition here!
# Now not all threads had time to go to sleep
logger
.
debug
(
"--
\r\n\n
--> Step {} starts with main thread waking up"
.
format
(
self
.
_curStep
))
# A new TE for the new step
self
.
_te
=
None
# set to empty first, to signal worker thread to stop
if
not
transitionFailed
:
# only if not failed
self
.
_te
=
TaskExecutor
(
self
.
_curStep
)
logger
.
debug
(
"[TRD] Main thread waking up at step {}, tapping worker threads"
.
format
(
self
.
_curStep
))
# Now not all threads had time to go to sleep
# Worker threads will wake up at this point, and each execute it's own task
self
.
tapAllThreads
()
# release all worker thread from their "gate"
def
_syncAtBarrier
(
self
):
# Now main thread (that's us) is ready to enter a step
# let other threads go past the pool barrier, but wait at the
# thread gate
logger
.
debug
(
"[TRD] Main thread about to cross the barrier"
)
self
.
crossStepBarrier
()
self
.
_stepBarrier
.
reset
()
# Other worker threads should now be at the "gate"
logger
.
debug
(
"[TRD] Main thread finished crossing the barrier"
)
def
_doTransition
(
self
):
transitionFailed
=
False
try
:
sm
=
self
.
_dbManager
.
getStateMachine
()
logger
.
debug
(
"[STT] starting transitions"
)
# at end of step, transiton the DB state
sm
.
transition
(
self
.
_executedTasks
)
logger
.
debug
(
"[STT] transition ended"
)
# Due to limitation (or maybe not) of the Python library,
# we cannot share connections across threads
if
sm
.
hasDatabase
():
for
t
in
self
.
_pool
.
threadList
:
logger
.
debug
(
"[DB] use db for all worker threads"
)
t
.
useDb
()
# t.execSql("use db") # main thread executing "use
# db" on behalf of every worker thread
except
taos
.
error
.
ProgrammingError
as
err
:
if
(
err
.
msg
==
'network unavailable'
):
# broken DB connection
logger
.
info
(
"DB connection broken, execution failed"
)
traceback
.
print_stack
()
transitionFailed
=
True
self
.
_te
=
None
# Not running any more
self
.
_execStats
.
registerFailure
(
"Broken DB Connection"
)
# continue # don't do that, need to tap all threads at
# end, and maybe signal them to stop
else
:
raise
self
.
resetExecutedTasks
()
# clear the tasks after we are done
# Get ready for next step
logger
.
debug
(
"<-- Step {} finished, trasition failed = {}"
.
format
(
self
.
_curStep
,
transitionFailed
))
return
transitionFailed
def
run
(
self
):
def
run
(
self
):
self
.
_pool
.
createAndStartThreads
(
self
)
self
.
_pool
.
createAndStartThreads
(
self
)
# Coordinate all threads step by step
# Coordinate all threads step by step
self
.
_curStep
=
-
1
# not started yet
self
.
_curStep
=
-
1
# not started yet
maxSteps
=
gConfig
.
max_steps
# type: ignore
self
.
_execStats
.
startExec
()
# start the stop watch
self
.
_execStats
.
startExec
()
# start the stop watch
transitionFailed
=
False
transitionFailed
=
False
hasAbortedTask
=
False
hasAbortedTask
=
False
while
(
self
.
_curStep
<
maxSteps
-
1
and
while
not
self
.
_runShouldEnd
(
transitionFailed
,
hasAbortedTask
):
(
not
transitionFailed
)
and
if
not
gConfig
.
debug
:
# print this only if we are not in debug mode
(
self
.
_runStatus
==
MainExec
.
STATUS_RUNNING
)
and
(
not
hasAbortedTask
)):
# maxStep==10, last curStep should be 9
if
not
gConfig
.
debug
:
# print this only if we are not in debug mode
print
(
"."
,
end
=
""
,
flush
=
True
)
print
(
"."
,
end
=
""
,
flush
=
True
)
logger
.
debug
(
"[TRD] Main thread going to sleep"
)
self
.
_syncAtBarrier
()
# For now just cross the barrier
# Now main thread (that's us) is ready to enter a step
# let other threads go past the pool barrier, but wait at the
# thread gate
self
.
crossStepBarrier
()
self
.
_stepBarrier
.
reset
()
# Other worker threads should now be at the "gate"
# At this point, all threads should be pass the overall "barrier" and before the per-thread "gate"
# At this point, all threads should be pass the overall "barrier" and before the per-thread "gate"
# We use this period to do house keeping work, when all worker
# We use this period to do house keeping work, when all worker
# threads are QUIET.
# threads are QUIET.
hasAbortedTask
=
False
hasAbortedTask
=
self
.
_hasAbortedTask
()
# from previous step
for
task
in
self
.
_executedTasks
:
if
hasAbortedTask
:
if
task
.
isAborted
():
logger
.
info
(
"Aborted task encountered, exiting test program"
)
print
(
"Task aborted: {}"
.
format
(
task
))
hasAbortedTask
=
True
break
if
hasAbortedTask
:
# do transition only if tasks are error free
self
.
_execStats
.
registerFailure
(
"Aborted Task Encountered"
)
self
.
_execStats
.
registerFailure
(
"Aborted Task Encountered"
)
else
:
break
# do transition only if tasks are error free
try
:
sm
=
self
.
_dbManager
.
getStateMachine
()
logger
.
debug
(
"[STT] starting transitions"
)
# at end of step, transiton the DB state
sm
.
transition
(
self
.
_executedTasks
)
logger
.
debug
(
"[STT] transition ended"
)
# Due to limitation (or maybe not) of the Python library,
# we cannot share connections across threads
if
sm
.
hasDatabase
():
for
t
in
self
.
_pool
.
threadList
:
logger
.
debug
(
"[DB] use db for all worker threads"
)
t
.
useDb
()
# t.execSql("use db") # main thread executing "use
# db" on behalf of every worker thread
except
taos
.
error
.
ProgrammingError
as
err
:
if
(
err
.
msg
==
'network unavailable'
):
# broken DB connection
logger
.
info
(
"DB connection broken, execution failed"
)
traceback
.
print_stack
()
transitionFailed
=
True
self
.
_te
=
None
# Not running any more
self
.
_execStats
.
registerFailure
(
"Broken DB Connection"
)
# continue # don't do that, need to tap all threads at
# end, and maybe signal them to stop
else
:
raise
# finally:
# pass
self
.
resetExecutedTasks
()
# clear the tasks after we are done
# Ending previous step
transitionFailed
=
self
.
_doTransition
()
# To start, we end step -1 first
# Then we move on to the next step
self
.
_releaseAllWorkerThreads
(
transitionFailed
)
# Get ready for next step
if
hasAbortedTask
or
transitionFailed
:
# abnormal ending, workers waiting at "gate"
logger
.
debug
(
"<-- Step {} finished"
.
format
(
self
.
_curStep
))
logger
.
debug
(
"Abnormal ending of main thraed"
)
self
.
_curStep
+=
1
# we are about to get into next step. TODO: race condition here!
else
:
# regular ending, workers waiting at "barrier"
# Now not all threads had time to go to sleep
logger
.
debug
(
"Regular ending, main thread waiting for all worker threads to stop..."
)
logger
.
debug
(
self
.
_syncAtBarrier
()
"
\r\n\n
--> Step {} starts with main thread waking up"
.
format
(
self
.
_curStep
))
# A new TE for the new step
self
.
_te
=
None
# No more executor, time to end
if
not
transitionFailed
:
# only if not failed
logger
.
debug
(
"Main thread tapping all threads one last time..."
)
self
.
_te
=
TaskExecutor
(
self
.
_curStep
)
self
.
tapAllThreads
()
# Let the threads run one last time
logger
.
debug
(
"[TRD] Main thread waking up at step {}, tapping worker threads"
.
format
(
self
.
_curStep
))
# Now not all threads had time to go to sleep
# Worker threads will wake up at this point, and each execute it's
# own task
self
.
tapAllThreads
()
logger
.
debug
(
"Main thread ready to finish up..."
)
if
not
transitionFailed
:
# only in regular situations
self
.
crossStepBarrier
()
# Cross it one last time, after all threads finish
self
.
_stepBarrier
.
reset
()
logger
.
debug
(
"Main thread in exclusive zone..."
)
self
.
_te
=
None
# No more executor, time to end
logger
.
debug
(
"Main thread tapping all threads one last time..."
)
self
.
tapAllThreads
()
# Let the threads run one last time
logger
.
debug
(
"
\r\n\n
--> Main thread ready to finish up..."
)
logger
.
debug
(
"Main thread joining all threads"
)
logger
.
debug
(
"Main thread joining all threads"
)
self
.
_pool
.
joinAll
()
# Get all threads to finish
self
.
_pool
.
joinAll
()
# Get all threads to finish
logger
.
info
(
"
\n
All worker threads finished"
)
logger
.
info
(
"
\n
All worker threads finished"
)
...
@@ -2258,8 +2272,9 @@ class ClientManager:
...
@@ -2258,8 +2272,9 @@ class ClientManager:
def
sigIntHandler
(
self
,
signalNumber
,
frame
):
def
sigIntHandler
(
self
,
signalNumber
,
frame
):
if
self
.
_status
!=
MainExec
.
STATUS_RUNNING
:
if
self
.
_status
!=
MainExec
.
STATUS_RUNNING
:
print
(
"Ignoring repeated SIGINT..."
)
print
(
"Repeated SIGINT received, forced exit..."
)
return
# do nothing if it's already not running
# return # do nothing if it's already not running
sys
.
exit
(
-
1
)
self
.
_status
=
MainExec
.
STATUS_STOPPING
# immediately set our status
self
.
_status
=
MainExec
.
STATUS_STOPPING
# immediately set our status
print
(
"Terminating program..."
)
print
(
"Terminating program..."
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录