Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
taosdata
TDengine
提交
c380e8f2
T
TDengine
项目概览
taosdata
/
TDengine
1 年多 前同步成功
通知
1185
Star
22017
Fork
4786
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
T
TDengine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
c380e8f2
编写于
10月 28, 2020
作者:
S
Shengliang Guan
提交者:
GitHub
10月 28, 2020
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #4028 from taosdata/feature/crash_gen
Enhanced crash_gen tool
上级
6d690442
eceae104
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
58 addition
and
12 deletion
+58
-12
tests/pytest/crash_gen.sh
tests/pytest/crash_gen.sh
+3
-1
tests/pytest/crash_gen/crash_gen.py
tests/pytest/crash_gen/crash_gen.py
+36
-7
tests/pytest/crash_gen/misc.py
tests/pytest/crash_gen/misc.py
+7
-1
tests/pytest/crash_gen/service_manager.py
tests/pytest/crash_gen/service_manager.py
+12
-3
未找到文件。
tests/pytest/crash_gen.sh
浏览文件 @
c380e8f2
...
...
@@ -70,10 +70,12 @@ if [[ $1 == '--valgrind' ]]; then
$CRASH_GEN_EXEC
$@
>
$VALGRIND_OUT
2>
$VALGRIND_ERR
elif
[[
$1
==
'--helgrind'
]]
;
then
shift
HELGRIND_OUT
=
helgrind.out
HELGRIND_ERR
=
helgrind.err
valgrind
\
--tool
=
helgrind
\
$PYTHON_EXEC
\
$CRASH_GEN_EXEC
$@
$CRASH_GEN_EXEC
$@
>
$HELGRIND_OUT
2>
$HELGRIND_ERR
else
$PYTHON_EXEC
$CRASH_GEN_EXEC
$@
fi
...
...
tests/pytest/crash_gen/crash_gen.py
浏览文件 @
c380e8f2
...
...
@@ -1226,6 +1226,11 @@ class Task():
"To be implemeted by child classes, class name: {}"
.
format
(
self
.
__class__
.
__name__
))
def
_isServiceStable
(
self
):
if
not
gSvcMgr
:
return
True
# we don't run service, so let's assume it's stable
return
gSvcMgr
.
isStable
()
# otherwise let's examine the service
def
_isErrAcceptable
(
self
,
errno
,
msg
):
if
errno
in
[
0x05
,
# TSDB_CODE_RPC_NOT_READY
...
...
@@ -1263,7 +1268,7 @@ class Task():
return
True
elif
msg
.
find
(
"duplicated column names"
)
!=
-
1
:
# also alter table tag issues
return
True
elif
gSvcMgr
and
(
not
gSvcMgr
.
isStable
()
):
# We are managing service, and ...
elif
not
self
.
_isServiceStable
(
):
# We are managing service, and ...
Logging
.
info
(
"Ignoring error when service starting/stopping: errno = {}, msg = {}"
.
format
(
errno
,
msg
))
return
True
...
...
@@ -1641,15 +1646,39 @@ class TaskReadData(StateTransitionTask):
def
canBeginFrom
(
cls
,
state
:
AnyState
):
return
state
.
canReadData
()
# def _canRestartService(self):
# if not gSvcMgr:
# return True # always
# return gSvcMgr.isActive() # only if it's running TODO: race condition here
def
_executeInternal
(
self
,
te
:
TaskExecutor
,
wt
:
WorkerThread
):
sTable
=
self
.
_db
.
getFixedSuperTable
()
# 1 in 5 chance, simulate a broken connection.
if
random
.
randrange
(
5
)
==
0
:
# TODO: break connection in all situations
wt
.
getDbConn
().
close
()
wt
.
getDbConn
().
open
()
print
(
"_r"
,
end
=
""
,
flush
=
True
)
# 1 in 5 chance, simulate a broken connection, only if service stable (not restarting)
if
random
.
randrange
(
20
)
==
0
:
# and self._canRestartService(): # TODO: break connection in all situations
# Logging.info("Attempting to reconnect to server") # TODO: change to DEBUG
Progress
.
emit
(
Progress
.
SERVICE_RECONNECT_START
)
try
:
wt
.
getDbConn
().
close
()
wt
.
getDbConn
().
open
()
except
ConnectionError
as
err
:
# may fail
if
not
gSvcMgr
:
Logging
.
error
(
"Failed to reconnect in client-only mode"
)
raise
# Not OK if we are running in client-only mode
if
gSvcMgr
.
isRunning
():
# may have race conditon, but low prob, due to
Logging
.
error
(
"Failed to reconnect when managed server is running"
)
raise
# Not OK if we are running normally
Progress
.
emit
(
Progress
.
SERVICE_RECONNECT_FAILURE
)
# Logging.info("Ignoring DB reconnect error")
# print("_r", end="", flush=True)
Progress
.
emit
(
Progress
.
SERVICE_RECONNECT_SUCCESS
)
# The above might have taken a lot of time, service might be running
# by now, causing error below to be incorrectly handled due to timing issue
return
# TODO: fix server restart status race condtion
dbc
=
wt
.
getDbConn
()
dbName
=
self
.
_db
.
getName
()
for
rTbName
in
sTable
.
getRegTables
(
dbc
,
dbName
):
# regular tables
...
...
tests/pytest/crash_gen/misc.py
浏览文件 @
c380e8f2
...
...
@@ -163,11 +163,17 @@ class Progress:
BEGIN_THREAD_STEP
=
1
END_THREAD_STEP
=
2
SERVICE_HEART_BEAT
=
3
SERVICE_RECONNECT_START
=
4
SERVICE_RECONNECT_SUCCESS
=
5
SERVICE_RECONNECT_FAILURE
=
6
tokens
=
{
STEP_BOUNDARY
:
'.'
,
BEGIN_THREAD_STEP
:
'['
,
END_THREAD_STEP
:
'] '
,
SERVICE_HEART_BEAT
:
'.Y.'
SERVICE_HEART_BEAT
:
'.Y.'
,
SERVICE_RECONNECT_START
:
'<r.'
,
SERVICE_RECONNECT_SUCCESS
:
'.r>'
,
SERVICE_RECONNECT_FAILURE
:
'.xr>'
,
}
@
classmethod
...
...
tests/pytest/crash_gen/service_manager.py
浏览文件 @
c380e8f2
...
...
@@ -280,16 +280,18 @@ class TdeSubProcess:
# process still alive, let's interrupt it
print
(
"Terminate running process, send SIG_INT and wait..."
)
# sub process should end, then IPC queue should end, causing IO thread to end
self
.
subProcess
.
send_signal
(
signal
.
SIGINT
)
# sig = signal.SIGINT
sig
=
signal
.
SIGKILL
self
.
subProcess
.
send_signal
(
sig
)
# SIGNINT or SIGKILL
self
.
subProcess
.
wait
(
20
)
retCode
=
self
.
subProcess
.
returncode
# should always be there
# May throw subprocess.TimeoutExpired exception above, therefore
# The process is guranteed to have ended by now
self
.
subProcess
=
None
if
retCode
!=
0
:
# != (- signal.SIGINT):
Logging
.
error
(
"TSP.stop(): Failed to stop sub proc properly w/ SIG
_INT, retCode={}"
.
format
(
retCode
))
Logging
.
error
(
"TSP.stop(): Failed to stop sub proc properly w/ SIG
{}, retCode={}"
.
format
(
sig
,
retCode
))
else
:
Logging
.
info
(
"TSP.stop(): sub proc successfully terminated with SIG
_INT"
)
Logging
.
info
(
"TSP.stop(): sub proc successfully terminated with SIG
{}"
.
format
(
sig
)
)
return
-
retCode
class
ServiceManager
:
...
...
@@ -395,6 +397,13 @@ class ServiceManager:
return
True
return
False
def
isRunning
(
self
):
for
ti
in
self
.
_tInsts
:
if
not
ti
.
getStatus
().
isRunning
():
return
False
return
True
# def isRestarting(self):
# """
# Determine if the service/cluster is being "restarted", i.e., at least
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录