Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
taosdata
TDengine
提交
a966bb11
TDengine
项目概览
taosdata
/
TDengine
1 年多 前同步成功
通知
1185
Star
22016
Fork
4786
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
TDengine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
a966bb11
编写于
8月 27, 2021
作者:
J
jiajingbin
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add tools/schemalessInsertPerformance.py
上级
cf1d1cf9
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
145 addition
and
181 deletion
+145
-181
tests/pytest/tools/schemalessInsertPerformance.py
tests/pytest/tools/schemalessInsertPerformance.py
+145
-181
未找到文件。
tests/pytest/tools/schemalessInsertPerformance.py
浏览文件 @
a966bb11
...
...
@@ -11,11 +11,8 @@
# -*- coding: utf-8 -*-
import
traceback
import
random
import
string
from
taos.error
import
LinesError
import
datetime
import
time
from
copy
import
deepcopy
import
numpy
as
np
...
...
@@ -24,34 +21,21 @@ from util.cases import *
from
util.sql
import
*
from
util.common
import
tdCom
import
threading
import
itertools
class
TDTestCase
:
def
init
(
self
,
conn
,
logSql
):
tdLog
.
debug
(
"start to execute %s"
%
__file__
)
tdSql
.
init
(
conn
.
cursor
(),
logSql
)
self
.
_conn
=
conn
def
genRandomTs
(
self
):
year
=
random
.
randint
(
2000
,
2021
)
month
=
random
.
randint
(
10
,
12
)
day
=
random
.
randint
(
10
,
29
)
hour
=
random
.
randint
(
10
,
24
)
minute
=
random
.
randint
(
10
,
59
)
second
=
random
.
randint
(
10
,
59
)
m_second
=
random
.
randint
(
101
,
199
)
date_time
=
f
'
{
year
}
-
{
month
}
-
{
day
}
{
hour
}
:
{
minute
}
:
{
second
}
'
print
(
date_time
)
timeArray
=
time
.
strptime
(
date_time
,
"%Y-%m-%d %H:%M:%S"
)
ts
=
int
(
time
.
mktime
(
timeArray
))
print
(
"------"
,
ts
)
# timestamp = time.mktime(datetime.datetime.strptime(date_time, "%Y-%m-%d %H:%M:%S.%f").timetuple())
return
f
'
{
ts
}
s'
self
.
lock
=
threading
.
Lock
()
def
genMultiColStr
(
self
,
int_count
=
4
,
double_count
=
0
,
binary_count
=
0
):
"""
genType must be tag/col
"""
'''
related to self.getPerfSql()
:count = 4 ---> 4 int
:count = 1000 ---> 400 int 400 double 200 binary(128)
:count = 4000 ---> 1900 int 1900 double 200 binary(128)
'''
col_str
=
""
if
double_count
==
0
and
binary_count
==
0
:
for
i
in
range
(
0
,
int_count
):
...
...
@@ -88,15 +72,23 @@ class TDTestCase:
return
col_str
def
genLongSql
(
self
,
int_count
=
4
,
double_count
=
0
,
binary_count
=
0
,
init
=
False
):
'''
:init ---> stb insert line
'''
if
init
:
tag_str
=
f
'id="init",t0=
{
random
.
randint
(
0
,
65535
)
}
i32,t1=
\"
{
tdCom
.
getLongName
(
10
,
"letters"
)
}
\"
'
else
:
tag_str
=
f
'id="sub_
{
tdCom
.
getLongName
(
5
,
"letters"
)
}
_
{
tdCom
.
getLongName
(
5
,
"letters"
)
}
",t0=
{
random
.
randint
(
0
,
65535
)
}
i32,t1=
\"
{
tdCom
.
getLongName
(
10
,
"letters"
)
}
\"
'
col_str
=
self
.
genMultiColStr
(
int_count
,
double_count
,
binary_count
)
col_str
=
self
.
genMultiColStr
(
int_count
=
int_count
,
double_count
=
double_count
,
binary_count
=
binary_count
)
long_sql
=
'stb'
+
','
+
tag_str
+
' '
+
col_str
+
'0'
return
long_sql
def
getPerfSql
(
self
,
count
=
4
,
init
=
False
):
'''
:count = 4 ---> 4 int
:count = 1000 ---> 400 int 400 double 200 binary(128)
:count = 4000 ---> 1900 int 1900 double 200 binary(128)
'''
if
count
==
4
:
input_sql
=
self
.
genLongSql
(
init
=
init
)
elif
count
==
1000
:
...
...
@@ -105,193 +97,165 @@ class TDTestCase:
input_sql
=
self
.
genLongSql
(
1900
,
1900
,
200
,
init
=
init
)
return
input_sql
def
tableGenerator
(
self
,
count
=
4
,
table_count
=
1000
):
for
i
in
range
(
table_count
):
yield
self
.
getPerfSql
(
count
)
def
genTableList
(
self
,
count
=
4
,
table_count
=
10000
):
table_list
=
list
()
for
i
in
range
(
1
,
table_count
+
1
):
table_list
.
append
(
self
.
getPerfSql
(
count
))
return
table_list
def
splitTableList
(
self
,
count
=
4
,
thread_count
=
10
,
table_count
=
1000
):
per_list_len
=
int
(
table_count
/
thread_count
)
table_list
=
self
.
genTableList
(
count
=
count
)
# ts = int(time.time())
list_of_group
=
zip
(
*
(
iter
(
table_list
),)
*
per_list_len
)
end_list
=
[
list
(
i
)
for
i
in
list_of_group
]
# i is a tuple
count
=
len
(
table_list
)
%
per_list_len
end_list
.
append
(
table_list
[
-
count
:])
if
count
!=
0
else
end_list
return
table_list
,
end_list
def
rowsGenerator
(
self
,
end_list
):
ts
=
int
(
time
.
time
())
input_sql_list
=
list
()
for
elm_list
in
end_list
:
for
elm
in
elm_list
:
for
i
in
range
(
1
,
10000
):
ts
-=
1
elm_new
=
self
.
replaceLastStr
(
elm
,
str
(
ts
))
+
's'
input_sql_list
.
append
(
elm_new
)
yield
input_sql_list
# def insertRows(self, count=4, thread_count=10):
# table_list = self.splitTableList(count=count, thread_count=thread_count)[0]
# for
def
replaceLastStr
(
self
,
str
,
new
):
'''
replace last element of str to new element
'''
list_ori
=
list
(
str
)
list_ori
[
-
1
]
=
new
return
''
.
join
(
list_ori
)
def
genDataList
(
self
,
table_list
,
row_count
=
10
):
data_list
=
list
()
ts
=
int
(
time
.
time
())
for
table_str
in
table_list
:
for
i
in
range
(
1
,
row_count
+
1
):
ts
-=
1
table_str_new
=
self
.
replaceLastStr
(
table_str
,
f
'
{
str
(
ts
)
}
s'
)
data_list
.
append
(
table_str_new
)
print
(
data_list
)
return
data_list
def
createStb
(
self
,
count
=
4
):
'''
create 1 stb
'''
input_sql
=
self
.
getPerfSql
(
count
=
count
,
init
=
True
)
self
.
_conn
.
insert_lines
([
input_sql
])
def
insertRows
(
self
,
count
=
4
,
table_count
=
1000
):
table_generator
=
self
.
tableGenerator
(
count
=
count
,
table_count
=
table_count
)
for
table_name
in
table_generator
:
pass
def
perfTableInsert
(
self
):
table_generator
=
self
.
tableGenerator
()
for
input_sql
in
table_generator
:
self
.
_conn
.
insert_lines
([
input_sql
])
# for i in range(10):
# self._conn.insert_lines([input_sql])
def
perfDataInsert
(
self
,
count
=
4
):
table_generator
=
self
.
tableGenerator
(
count
=
count
)
ts
=
int
(
time
.
time
())
for
input_sql
in
table_generator
:
print
(
"input_sql-----------"
,
input_sql
)
self
.
_conn
.
insert_lines
([
input_sql
])
for
i
in
range
(
100000
):
ts
-=
1
input_sql_new
=
self
.
replaceLastStr
(
input_sql
,
str
(
ts
))
+
's'
print
(
"input_sql_new---------"
,
input_sql_new
)
self
.
_conn
.
insert_lines
([
input_sql_new
])
def
batchCreateTable
(
self
,
batch_list
):
'''
schemaless insert api
'''
self
.
_conn
.
insert_lines
(
batch_list
)
# print(threading.current_thread().name, "length=", len(batch_list))
# print(threading.current_thread().name, 'firstline', batch_list[0])
# print(threading.current_thread().name, 'lastline:', batch_list[-1])
# print(threading.current_thread().name, 'end')
def
splitGenerator
(
self
,
table_list
,
sub_list_len
):
'''
split a list to n piece of sub_list
[a, b, c, d] ---> [[a, b], [c, d]]
yield type ---> generator
'''
for
i
in
range
(
0
,
len
(
table_list
),
sub_list_len
):
yield
table_list
[
i
:
i
+
sub_list_len
]
def
genTbListGenerator
(
self
,
table_list
,
sub_list_len
):
'''
split table_list, after split, every sub_list len is sub_list_len
'''
table_list_generator
=
self
.
splitGenerator
(
table_list
,
sub_list_len
)
return
table_list_generator
def
batchInsertTable
(
self
,
batch_list
):
for
insert_list
in
batch_list
:
print
(
threading
.
current_thread
().
name
,
"length="
,
len
(
insert_list
))
print
(
threading
.
current_thread
().
name
,
'firstline'
,
insert_list
[
0
])
print
(
threading
.
current_thread
().
name
,
'lastline:'
,
insert_list
[
-
1
])
self
.
_conn
.
insert_lines
(
insert_list
)
print
(
threading
.
current_thread
().
name
,
'end'
)
def
genTableList
(
self
,
count
=
4
,
table_count
=
10000
):
'''
gen len(table_count) table_list
'''
table_list
=
list
()
for
i
in
range
(
table_count
):
table_list
.
append
(
self
.
getPerfSql
(
count
=
count
))
return
table_list
def
genTableThread
(
self
,
thread_count
=
10
):
def
threadCreateTables
(
self
,
table_list_generator
,
thread_count
=
10
):
'''
thread create tables
'''
threads
=
list
()
for
i
in
range
(
thread_count
):
t
=
threading
.
Thread
(
target
=
self
.
perfTableInsert
)
t
=
threading
.
Thread
(
target
=
self
.
batchCreateTable
,
args
=
(
next
(
table_list_generator
),)
)
threads
.
append
(
t
)
return
threads
def
genMultiThread
(
self
,
count
,
thread_count
=
10
):
def
batchInsertRows
(
self
,
table_list
,
rows_count
):
'''
add rows in each table ---> count=rows_count
'''
for
input_sql
in
table_list
:
ts
=
int
(
time
.
time
())
input_sql_list
=
list
()
for
i
in
range
(
rows_count
-
1
):
ts
-=
1
elm_new
=
self
.
replaceLastStr
(
input_sql
,
str
(
ts
))
+
's'
input_sql_list
.
append
(
elm_new
)
self
.
batchCreateTable
(
input_sql_list
)
def
threadsInsertRows
(
self
,
rows_generator
,
rows_count
=
1000
,
thread_count
=
10
):
'''
multi insert rows in each table
'''
threads
=
list
()
for
i
in
range
(
thread_count
):
t
=
threading
.
Thread
(
target
=
self
.
perfDataInsert
,
args
=
(
count
,))
self
.
lock
.
acquire
()
t
=
threading
.
Thread
(
target
=
self
.
batchInsertRows
,
args
=
(
next
(
rows_generator
),
rows_count
,))
threads
.
append
(
t
)
self
.
lock
.
release
()
return
threads
def
multiThreadRun
(
self
,
threads
):
'''
multi run threads
'''
for
t
in
threads
:
t
.
start
()
for
t
in
threads
:
t
.
join
()
def
createStb
(
self
,
count
=
4
):
input_sql
=
self
.
getPerfSql
(
count
=
count
,
init
=
True
)
self
.
_conn
.
insert_lines
([
input_sql
])
def
threadInsertTable
(
self
,
end_list
,
thread_count
=
10
):
threads
=
list
()
for
i
in
range
(
thread_count
):
t
=
threading
.
Thread
(
target
=
self
.
batchInsertTable
,
args
=
(
end_list
,))
threads
.
append
(
t
)
return
threads
def
finalRun
(
self
):
def
createTables
(
self
,
count
,
table_count
=
10000
,
sub_list_len
=
1000
,
thread_count
=
10
):
'''
create stb and tb
'''
table_list
=
self
.
genTableList
(
count
=
count
,
table_count
=
table_count
)
create_tables_start_time
=
time
.
time
()
self
.
createStb
()
table_list
,
end_list
=
self
.
splitTableList
()
batchInsertTableThread
=
self
.
threadInsertTable
(
end_list
=
end_list
)
self
.
multiThreadRun
(
batchInsertTableThread
)
# print(end_list)
# def createTb(self, count=4):
# input_sql = self.getPerfSql(count=count)
# for i in range(10000):
# self._conn.insert_lines([input_sql])
# def createTb1(self, count=4):
# start_time = time.time()
# self.multiThreadRun(self.genMultiThread(input_sql))
# end_time = time.time()
# return end_time - start_time
table_list_generator
=
self
.
genTbListGenerator
(
table_list
,
sub_list_len
)
create_tables_generator
,
insert_rows_generator
=
itertools
.
tee
(
table_list_generator
,
2
)
self
.
multiThreadRun
(
self
.
threadCreateTables
(
table_list_generator
=
create_tables_generator
,
thread_count
=
thread_count
))
create_tables_end_time
=
time
.
time
()
create_tables_time
=
int
(
create_tables_end_time
-
create_tables_start_time
)
return_str
=
f
'create tables
\'
time of
{
count
}
columns --->
{
create_tables_time
}
s'
return
insert_rows_generator
,
create_tables_time
,
return_str
def
insertRows
(
self
,
count
,
rows_generator
,
rows_count
=
1000
,
thread_count
=
10
):
'''
insert rows
'''
insert_rows_start_time
=
time
.
time
()
self
.
multiThreadRun
(
self
.
threadsInsertRows
(
rows_generator
=
rows_generator
,
rows_count
=
rows_count
,
thread_count
=
thread_count
))
insert_rows_end_time
=
time
.
time
()
insert_rows_time
=
int
(
insert_rows_end_time
-
insert_rows_start_time
)
return_str
=
f
'insert rows
\'
time of
{
count
}
columns --->
{
insert_rows_time
}
s'
return
insert_rows_time
,
return_str
def
schemalessPerfTest
(
self
,
count
,
table_count
=
10000
,
sub_list_len
=
1000
,
thread_count
=
10
):
'''
get performance
'''
insert_rows_generator
=
self
.
createTables
(
count
=
count
,
table_count
=
table_count
,
sub_list_len
=
sub_list_len
,
thread_count
=
thread_count
)[
0
]
return
self
.
insertRows
(
count
=
count
,
rows_generator
=
insert_rows_generator
,
rows_count
=
1000
,
thread_count
=
10
)
def
getPerfResults
(
self
,
test_times
=
3
,
table_count
=
10000
,
sub_list_len
=
1000
,
thread_count
=
10
):
col4_time
=
0
col1000_time
=
0
col4000_time
=
0
# for i in range(test_times):
# time_used = self.schemalessPerfTest(count=4, table_count=table_count, sub_list_len=sub_list_len, thread_count=thread_count)[0]
# col4_time += time_used
# col4_time /= test_times
# print(col4_time)
# tdCom.cleanTb()
for
i
in
range
(
test_times
):
time_used
=
self
.
schemalessPerfTest
(
count
=
1000
,
table_count
=
table_count
,
sub_list_len
=
sub_list_len
,
thread_count
=
thread_count
)[
0
]
col1000_time
+=
time_used
col1000_time
/=
test_times
print
(
col1000_time
)
# def calInsertTableTime(self):
# start_time = time.time()
# self.createStb()
# self.multiThreadRun(self.genMultiThread())
# end_time = time.time()
# return end_time - start_time
def
calRunTime
(
self
,
count
=
4
):
start_time
=
time
.
time
()
self
.
createStb
()
self
.
multiThreadRun
(
self
.
genMultiThread
(
count
=
count
))
end_time
=
time
.
time
()
return
end_time
-
start_time
def
calRunTime1
(
self
,
count
=
4
):
start_time
=
time
.
time
()
self
.
createStb
()
self
.
multiThreadRun
(
self
.
perfTableInsert
())
# self.perfTableInsert()
# def schemalessInsertPerfTest(self, count=4):
# input_sql = self.getPerfSql(count)
# self.calRunTime(input_sql)
tdCom
.
cleanTb
()
for
i
in
range
(
test_times
):
time_used
=
self
.
schemalessPerfTest
(
count
=
4000
,
table_count
=
table_count
,
sub_list_len
=
sub_list_len
,
thread_count
=
thread_count
)[
0
]
col4000_time
+=
time_used
col4000_time
/=
test_times
print
(
col4000_time
)
# def test(self):
# sql1 = 'stb,id="init",t0=14865i32,t1="tvnqbjuqck" c0=37i32,c1=217i32,c2=3i32,c3=88i32 1626006833640ms'
# sql2 = 'stb,id="init",t0=14865i32,t1="tvnqbjuqck" c0=38i32,c1=217i32,c2=3i32,c3=88i32 1626006833641ms'
# self._conn.insert_lines([sql1])
# self._conn.insert_lines([sql2])
return
col4_time
,
col1000_time
,
col4000_time
def
run
(
self
):
print
(
"running {}"
.
format
(
__file__
))
tdSql
.
prepare
()
self
.
finalRun
()
# print(self.calRunTime1(count=4))
# print(self.calRunTime(count=4))
# print(self.genRandomTs())
# self.calInsertTableTime()
# self.test()
# table_list = self.splitTableList()[0]
# data_list = self.genDataList(table_list)
# print(len(data_list))
# end_list = [['stb,id="sub_vzvfx_dbuxp",t0=9961i32,t1="zjjfayhfep" c0=83i32,c1=169i32,c2=177i32,c3=4i32 0','stb,id="sub_vzvfx_dbuxp",t0=9961i32,t1="zjjfayhfep" c0=83i32,c1=169i32,c2=177i32,c3=4i32 0'], ['stb,id="sub_vzvfx_dbuxp",t0=9961i32,t1="zjjfayhfep" c0=83i32,c1=169i32,c2=177i32,c3=4i32 0','stb,id="sub_vzvfx_dbuxp",t0=9961i32,t1="zjjfayhfep" c0=83i32,c1=169i32,c2=177i32,c3=4i32 0']]
# rowsGenerator = self.rowsGenerator(end_list)
# for i in rowsGenerator:
# print(i)
result
=
self
.
getPerfResults
(
test_times
=
3
,
table_count
=
1000
,
sub_list_len
=
100
,
thread_count
=
10
)
print
(
result
)
def
stop
(
self
):
tdSql
.
close
()
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录