Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
taosdata
TDengine
提交
cd76a295
T
TDengine
项目概览
taosdata
/
TDengine
1 年多 前同步成功
通知
1185
Star
22016
Fork
4786
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
T
TDengine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
cd76a295
编写于
5月 31, 2021
作者:
S
Steven Li
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Adjust table locking in crash_gen to expose same-connection consistency issues, supporting TD-4444
上级
7f3fab49
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
115 addition
and
41 deletion
+115
-41
src/connector/python/taos/__init__.py
src/connector/python/taos/__init__.py
+4
-0
tests/pytest/crash_gen/crash_gen_main.py
tests/pytest/crash_gen/crash_gen_main.py
+102
-37
tests/pytest/crash_gen/service_manager.py
tests/pytest/crash_gen/service_manager.py
+7
-3
tests/pytest/crash_gen/shared/misc.py
tests/pytest/crash_gen/shared/misc.py
+2
-1
未找到文件。
src/connector/python/taos/__init__.py
浏览文件 @
cd76a295
...
...
@@ -2,6 +2,10 @@
from
.connection
import
TDengineConnection
from
.cursor
import
TDengineCursor
# For some reason, the following is needed for VS Code (through PyLance) to
# recognize that "error" is a valid module of the "taos" package.
from
.error
import
ProgrammingError
# Globals
threadsafety
=
0
paramstyle
=
'pyformat'
...
...
tests/pytest/crash_gen/crash_gen_main.py
浏览文件 @
cd76a295
...
...
@@ -37,6 +37,7 @@ import requests
import
gc
import
taos
from
.shared.types
import
TdColumns
,
TdTags
# from crash_gen import ServiceManager, TdeInstance, TdeSubProcess
...
...
@@ -160,6 +161,7 @@ class WorkerThread:
Logging
.
debug
(
"[TRD] Thread Coordinator not running any more, worker thread now stopping..."
)
break
# Before we fetch the task and run it, let's ensure we properly "use" the database (not needed any more)
try
:
if
(
Config
.
getConfig
().
per_thread_db_connection
):
# most likely TRUE
...
...
@@ -1362,9 +1364,12 @@ class Task():
Progress
.
emit
(
Progress
.
ACCEPTABLE_ERROR
)
self
.
_err
=
err
else
:
# not an acceptable error
errMsg
=
"[=] Unexpected Taos library exception ({}): errno=0x{:X}, msg: {}, SQL: {}"
.
format
(
shortTid
=
threading
.
get_ident
()
%
10000
errMsg
=
"[=] Unexpected Taos library exception ({}): errno=0x{:X}, thread={}, msg: {}, SQL: {}"
.
format
(
self
.
__class__
.
__name__
,
errno2
,
err
,
wt
.
getDbConn
().
getLastSql
())
errno2
,
shortTid
,
err
,
wt
.
getDbConn
().
getLastSql
())
self
.
logDebug
(
errMsg
)
if
Config
.
getConfig
().
debug
:
# raise # so that we see full stack
...
...
@@ -1411,21 +1416,31 @@ class Task():
def
lockTable
(
self
,
ftName
):
# full table name
# print(" <<" + ftName + '_', end="", flush=True)
with
Task
.
_lock
:
if
not
ftName
in
Task
.
_tableLocks
:
with
Task
.
_lock
:
# SHORT lock! so we only protect lock creation
if
not
ftName
in
Task
.
_tableLocks
:
# Create new lock and add to list, if needed
Task
.
_tableLocks
[
ftName
]
=
threading
.
Lock
()
Task
.
_tableLocks
[
ftName
].
acquire
()
# No lock protection, anybody can do this any time
lock
=
Task
.
_tableLocks
[
ftName
]
# Logging.info("Acquiring lock: {}, {}".format(ftName, lock))
lock
.
acquire
()
# Logging.info("Acquiring lock successful: {}".format(lock))
def
unlockTable
(
self
,
ftName
):
# print('_' + ftName + ">> ", end="", flush=True)
with
Task
.
_lock
:
with
Task
.
_lock
:
if
not
ftName
in
self
.
_tableLocks
:
raise
RuntimeError
(
"Corrupt state, no such lock"
)
lock
=
Task
.
_tableLocks
[
ftName
]
if
not
lock
.
locked
():
raise
RuntimeError
(
"Corrupte state, already unlocked"
)
lock
.
release
()
# Important note, we want to protect unlocking under the task level
# locking, because we don't want the lock to be deleted (maybe in the futur)
# while we unlock it
# Logging.info("Releasing lock: {}".format(lock))
lock
.
release
()
# Logging.info("Releasing lock successful: {}".format(lock))
class
ExecutionStats
:
...
...
@@ -1696,6 +1711,11 @@ class TdSuperTable:
return
dbc
.
query
(
"SELECT * FROM {}.{}"
.
format
(
self
.
_dbName
,
self
.
_stName
))
>
0
def
ensureRegTable
(
self
,
task
:
Optional
[
Task
],
dbc
:
DbConn
,
regTableName
:
str
):
'''
Make sure a regular table exists for this super table, creating it if necessary.
If there is an associated "Task" that wants to do this, "lock" this table so that
others don't access it while we create it.
'''
dbName
=
self
.
_dbName
sql
=
"select tbname from {}.{} where tbname in ('{}')"
.
format
(
dbName
,
self
.
_stName
,
regTableName
)
if
dbc
.
query
(
sql
)
>=
1
:
# reg table exists already
...
...
@@ -1703,18 +1723,24 @@ class TdSuperTable:
# acquire a lock first, so as to be able to *verify*. More details in TD-1471
fullTableName
=
dbName
+
'.'
+
regTableName
if
task
is
not
None
:
# TODO: what happens if we don't lock the table
task
.
lockTable
(
fullTableName
)
if
task
is
not
None
:
# Somethime thie operation is requested on behalf of a "task"
# Logging.info("Locking table for creation: {}".format(fullTableName))
task
.
lockTable
(
fullTableName
)
# in which case we'll lock this table to ensure serialized access
# Logging.info("Table locked for creation".format(fullTableName))
Progress
.
emit
(
Progress
.
CREATE_TABLE_ATTEMPT
)
# ATTEMPT to create a new table
# print("(" + fullTableName[-3:] + ")", end="", flush=True)
try
:
sql
=
"CREATE TABLE {} USING {}.{} tags ({})"
.
format
(
fullTableName
,
dbName
,
self
.
_stName
,
self
.
_getTagStrForSql
(
dbc
)
)
# Logging.info("Creating regular with SQL: {}".format(sql))
dbc
.
execute
(
sql
)
# Logging.info("Regular table created: {}".format(sql))
finally
:
if
task
is
not
None
:
# Logging.info("Unlocking table after creation: {}".format(fullTableName))
task
.
unlockTable
(
fullTableName
)
# no matter what
# Logging.info("Table unlocked after creation: {}".format(fullTableName))
def
_getTagStrForSql
(
self
,
dbc
)
:
tags
=
self
.
_getTags
(
dbc
)
...
...
@@ -2011,9 +2037,30 @@ class TaskAddData(StateTransitionTask):
def
canBeginFrom
(
cls
,
state
:
AnyState
):
return
state
.
canAddData
()
def
_lockTableIfNeeded
(
self
,
fullTableName
,
extraMsg
=
''
):
if
Config
.
getConfig
().
verify_data
:
# Logging.info("Locking table: {}".format(fullTableName))
self
.
lockTable
(
fullTableName
)
# Logging.info("Table locked {}: {}".format(extraMsg, fullTableName))
# print("_w" + str(nextInt % 100), end="", flush=True) # Trace what was written
else
:
# Logging.info("Skipping locking table")
pass
def
_unlockTableIfNeeded
(
self
,
fullTableName
):
if
Config
.
getConfig
().
verify_data
:
# Logging.info("Unlocking table: {}".format(fullTableName))
self
.
unlockTable
(
fullTableName
)
# Logging.info("Table unlocked: {}".format(fullTableName))
else
:
pass
# Logging.info("Skipping unlocking table")
def
_addDataInBatch
(
self
,
db
,
dbc
,
regTableName
,
te
:
TaskExecutor
):
numRecords
=
self
.
LARGE_NUMBER_OF_RECORDS
if
Config
.
getConfig
().
larger_data
else
self
.
SMALL_NUMBER_OF_RECORDS
fullTableName
=
db
.
getName
()
+
'.'
+
regTableName
self
.
_lockTableIfNeeded
(
fullTableName
,
'batch'
)
sql
=
"INSERT INTO {} VALUES "
.
format
(
fullTableName
)
for
j
in
range
(
numRecords
):
# number of records per table
...
...
@@ -2021,51 +2068,60 @@ class TaskAddData(StateTransitionTask):
nextTick
=
db
.
getNextTick
()
nextColor
=
db
.
getNextColor
()
sql
+=
"('{}', {}, '{}');"
.
format
(
nextTick
,
nextInt
,
nextColor
)
dbc
.
execute
(
sql
)
# Logging.info("Adding data in batch: {}".format(sql))
try
:
dbc
.
execute
(
sql
)
finally
:
# Logging.info("Data added in batch: {}".format(sql))
self
.
_unlockTableIfNeeded
(
fullTableName
)
def
_addData
(
self
,
db
:
Database
,
dbc
,
regTableName
,
te
:
TaskExecutor
):
# implied: NOT in batches
numRecords
=
self
.
LARGE_NUMBER_OF_RECORDS
if
Config
.
getConfig
().
larger_data
else
self
.
SMALL_NUMBER_OF_RECORDS
for
j
in
range
(
numRecords
):
# number of records per table
nextInt
=
db
.
getNextInt
()
intToWrite
=
db
.
getNextInt
()
nextTick
=
db
.
getNextTick
()
nextColor
=
db
.
getNextColor
()
if
Config
.
getConfig
().
record_ops
:
self
.
prepToRecordOps
()
if
self
.
fAddLogReady
is
None
:
raise
CrashGenError
(
"Unexpected empty fAddLogReady"
)
self
.
fAddLogReady
.
write
(
"Ready to write {} to {}
\n
"
.
format
(
nextInt
,
regTableName
))
self
.
fAddLogReady
.
write
(
"Ready to write {} to {}
\n
"
.
format
(
intToWrite
,
regTableName
))
self
.
fAddLogReady
.
flush
()
os
.
fsync
(
self
.
fAddLogReady
.
fileno
())
# TODO: too ugly trying to lock the table reliably, refactor...
fullTableName
=
db
.
getName
()
+
'.'
+
regTableName
if
Config
.
getConfig
().
verify_data
:
self
.
lockTable
(
fullTableName
)
# print("_w" + str(nextInt % 100), end="", flush=True) # Trace what was written
self
.
_lockTableIfNeeded
(
fullTableName
)
# so that we are verify read-back. TODO: deal with exceptions before unlock
try
:
sql
=
"INSERT INTO {} VALUES ('{}', {}, '{}');"
.
format
(
# removed: tags ('{}', {})
fullTableName
,
# ds.getFixedSuperTableName(),
# ds.getNextBinary(), ds.getNextFloat(),
nextTick
,
nextInt
,
nextColor
)
nextTick
,
intToWrite
,
nextColor
)
# Logging.info("Adding data: {}".format(sql))
dbc
.
execute
(
sql
)
# Logging.info("Data added: {}".format(sql))
intWrote
=
intToWrite
# Quick hack, attach an update statement here. TODO: create an "update" task
if
(
not
Config
.
getConfig
().
use_shadow_db
)
and
Dice
.
throw
(
5
)
==
0
:
# 1 in N chance, plus not using shaddow DB
nextInt
=
db
.
getNextInt
()
intToUpdate
=
db
.
getNextInt
()
# Updated, but should not succeed
nextColor
=
db
.
getNextColor
()
sql
=
"INSERt INTO {} VALUES ('{}', {}, '{}');"
.
format
(
# "INSERt" means "update" here
fullTableName
,
nextTick
,
nextInt
,
nextColor
)
nextTick
,
intToUpdate
,
nextColor
)
# sql = "UPDATE {} set speed={}, color='{}' WHERE ts='{}'".format(
# fullTableName, db.getNextInt(), db.getNextColor(), nextTick)
dbc
.
execute
(
sql
)
intWrote
=
intToUpdate
# We updated, seems TDengine non-cluster accepts this.
except
:
# Any exception at all
if
Config
.
getConfig
().
verify_data
:
self
.
unlockTable
(
fullTableName
)
self
.
_unlockTableIfNeeded
(
fullTableName
)
raise
# Now read it back and verify, we might encounter an error if table is dropped
...
...
@@ -2073,33 +2129,41 @@ class TaskAddData(StateTransitionTask):
try
:
readBack
=
dbc
.
queryScalar
(
"SELECT speed from {}.{} WHERE ts='{}'"
.
format
(
db
.
getName
(),
regTableName
,
nextTick
))
if
readBack
!=
nextInt
:
if
readBack
!=
intWrote
:
raise
taos
.
error
.
ProgrammingError
(
"Failed to read back same data, wrote: {}, read: {}"
.
format
(
nextInt
,
readBack
),
0x999
)
.
format
(
intWrote
,
readBack
),
0x999
)
except
taos
.
error
.
ProgrammingError
as
err
:
errno
=
Helper
.
convertErrno
(
err
.
errno
)
if
errno
in
[
CrashGenError
.
INVALID_EMPTY_RESULT
,
CrashGenError
.
INVALID_MULTIPLE_RESULT
]
:
# not a single
result
if
errno
==
CrashGenError
.
INVALID_EMPTY_RESULT
:
# empty
result
raise
taos
.
error
.
ProgrammingError
(
"Failed to read back same data for tick: {}, wrote: {}, read: {}"
.
format
(
nextTick
,
nextInt
,
"Empty Result"
if
errno
==
CrashGenError
.
INVALID_EMPTY_RESULT
else
"Multiple Result"
),
"Failed to read back same data for tick: {}, wrote: {}, read: EMPTY"
.
format
(
nextTick
,
intWrote
),
errno
)
elif
errno
==
CrashGenError
.
INVALID_MULTIPLE_RESULT
:
# multiple results
raise
taos
.
error
.
ProgrammingError
(
"Failed to read back same data for tick: {}, wrote: {}, read: MULTIPLE RESULTS"
.
format
(
nextTick
,
intWrote
),
errno
)
elif
errno
in
[
0x218
,
0x362
]:
# table doesn't exist
# do nothing
dummy
=
0
pass
else
:
# Re-throw otherwise
raise
finally
:
self
.
unlockTable
(
fullTableName
)
# Unlock the table no matter what
self
.
_unlockTableIfNeeded
(
fullTableName
)
# Quite ugly, refactor lock/unlock
# Done with read-back verification, unlock the table now
else
:
self
.
_unlockTableIfNeeded
(
fullTableName
)
# Successfully wrote the data into the DB, let's record it somehow
te
.
recordDataMark
(
nextInt
)
te
.
recordDataMark
(
intWrote
)
if
Config
.
getConfig
().
record_ops
:
if
self
.
fAddLogDone
is
None
:
raise
CrashGenError
(
"Unexpected empty fAddLogDone"
)
self
.
fAddLogDone
.
write
(
"Wrote {} to {}
\n
"
.
format
(
nextInt
,
regTableName
))
self
.
fAddLogDone
.
write
(
"Wrote {} to {}
\n
"
.
format
(
intWrote
,
regTableName
))
self
.
fAddLogDone
.
flush
()
os
.
fsync
(
self
.
fAddLogDone
.
fileno
())
...
...
@@ -2137,15 +2201,16 @@ class TaskAddData(StateTransitionTask):
class
ThreadStacks
:
# stack info for all threads
def
__init__
(
self
):
self
.
_allStacks
=
{}
allFrames
=
sys
.
_current_frames
()
for
th
in
threading
.
enumerate
():
allFrames
=
sys
.
_current_frames
()
# All current stack frames
for
th
in
threading
.
enumerate
():
# For each thread
if
th
.
ident
is
None
:
continue
stack
=
traceback
.
extract_stack
(
allFrames
[
th
.
ident
])
self
.
_allStacks
[
th
.
native_id
]
=
stack
stack
=
traceback
.
extract_stack
(
allFrames
[
th
.
ident
])
# Get stack for a thread
shortTid
=
th
.
ident
%
10000
self
.
_allStacks
[
shortTid
]
=
stack
# Was using th.native_id
def
print
(
self
,
filteredEndName
=
None
,
filterInternal
=
False
):
for
t
hNid
,
stack
in
self
.
_allStacks
.
items
():
# for each thread, stack frames top to bottom
for
t
Ident
,
stack
in
self
.
_allStacks
.
items
():
# for each thread, stack frames top to bottom
lastFrame
=
stack
[
-
1
]
if
filteredEndName
:
# we need to filter out stacks that match this name
if
lastFrame
.
name
==
filteredEndName
:
# end did not match
...
...
@@ -2157,7 +2222,7 @@ class ThreadStacks: # stack info for all threads
'__init__'
]:
# the thread that extracted the stack
continue
# ignore
# Now print
print
(
"
\n
<----- Thread Info for LWP/ID: {} (most recent call last) <-----"
.
format
(
t
hNid
))
print
(
"
\n
<----- Thread Info for LWP/ID: {} (most recent call last) <-----"
.
format
(
t
Ident
))
stackFrame
=
0
for
frame
in
stack
:
# was using: reversed(stack)
# print(frame)
...
...
@@ -2376,7 +2441,7 @@ class MainExec:
action
=
'store'
,
default
=
0
,
type
=
int
,
help
=
'
Maximum number of DBs to keep
, set to disable dropping DB. (default: 0)'
)
help
=
'
Number of DBs to use
, set to disable dropping DB. (default: 0)'
)
parser
.
add_argument
(
'-c'
,
'--connector-type'
,
...
...
tests/pytest/crash_gen/service_manager.py
浏览文件 @
cd76a295
...
...
@@ -179,7 +179,7 @@ quorum 2
def
getServiceCmdLine
(
self
):
# to start the instance
if
Config
.
getConfig
().
track_memory_leaks
:
Logging
.
info
(
"Invoking VALGRIND on service..."
)
return
[
'exec
/usr/bin/
valgrind'
,
'--leak-check=yes'
,
self
.
getExecFile
(),
'-c'
,
self
.
getCfgDir
()]
return
[
'exec valgrind'
,
'--leak-check=yes'
,
self
.
getExecFile
(),
'-c'
,
self
.
getCfgDir
()]
else
:
# TODO: move "exec -c" into Popen(), we can both "use shell" and NOT fork so ask to lose kill control
return
[
"exec "
+
self
.
getExecFile
(),
'-c'
,
self
.
getCfgDir
()]
# used in subproce.Popen()
...
...
@@ -310,7 +310,7 @@ class TdeSubProcess:
# print("Starting TDengine with env: ", myEnv.items())
print
(
"Starting TDengine: {}"
.
format
(
cmdLine
))
ret
urn
Popen
(
ret
=
Popen
(
' '
.
join
(
cmdLine
),
# ' '.join(cmdLine) if useShell else cmdLine,
shell
=
True
,
# Always use shell, since we need to pass ENV vars
stdout
=
PIPE
,
...
...
@@ -318,6 +318,10 @@ class TdeSubProcess:
close_fds
=
ON_POSIX
,
env
=
myEnv
)
# had text=True, which interferred with reading EOF
time
.
sleep
(
0.01
)
# very brief wait, then let's check if sub process started successfully.
if
ret
.
poll
():
raise
CrashGenError
(
"Sub process failed to start with command line: {}"
.
format
(
cmdLine
))
return
ret
STOP_SIGNAL
=
signal
.
SIGINT
# signal.SIGKILL/SIGINT # What signal to use (in kill) to stop a taosd process?
SIG_KILL_RETCODE
=
137
# ref: https://stackoverflow.com/questions/43268156/process-finished-with-exit-code-137-in-pycharm
...
...
@@ -614,7 +618,7 @@ class ServiceManager:
# Find if there's already a taosd service, and then kill it
for
proc
in
psutil
.
process_iter
():
if
proc
.
name
()
==
'taosd'
:
if
proc
.
name
()
==
'taosd'
or
proc
.
name
()
==
'memcheck-amd64-'
:
# Regular or under Valgrind
Logging
.
info
(
"Killing an existing TAOSD process in 2 seconds... press CTRL-C to interrupt"
)
time
.
sleep
(
2.0
)
proc
.
kill
()
...
...
tests/pytest/crash_gen/shared/misc.py
浏览文件 @
cd76a295
...
...
@@ -35,7 +35,8 @@ class LoggingFilter(logging.Filter):
class
MyLoggingAdapter
(
logging
.
LoggerAdapter
):
def
process
(
self
,
msg
,
kwargs
):
return
"[{:04d}] {}"
.
format
(
threading
.
get_ident
()
%
10000
,
msg
),
kwargs
shortTid
=
threading
.
get_ident
()
%
10000
return
"[{:04d}] {}"
.
format
(
shortTid
,
msg
),
kwargs
# return '[%s] %s' % (self.extra['connid'], msg), kwargs
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录