提交 0d2d3349 编写于 作者: X Xiaoyu Wang

Merge remote-tracking branch 'origin/main' into fix/main_bugfix_wxy

......@@ -2,7 +2,7 @@
# taosadapter
ExternalProject_Add(taosadapter
GIT_REPOSITORY https://github.com/taosdata/taosadapter.git
GIT_TAG 213f8b3
GIT_TAG 3e08996
SOURCE_DIR "${TD_SOURCE_DIR}/tools/taosadapter"
BINARY_DIR ""
#BUILD_IN_SOURCE TRUE
......
......@@ -2,7 +2,7 @@
# taos-tools
ExternalProject_Add(taos-tools
GIT_REPOSITORY https://github.com/taosdata/taos-tools.git
GIT_TAG 7d24ed5
GIT_TAG 0cd564a
SOURCE_DIR "${TD_SOURCE_DIR}/tools/taos-tools"
BINARY_DIR ""
#BUILD_IN_SOURCE TRUE
......
module goexample
go 1.17
require github.com/taosdata/driver-go/v3 3.0
import pandas
from sqlalchemy import create_engine
from sqlalchemy import create_engine, text
engine = create_engine("taos://root:taosdata@localhost:6030/power")
df = pandas.read_sql("SELECT * FROM meters", engine)
conn = engine.connect()
df = pandas.read_sql(text("SELECT * FROM power.meters"), conn)
conn.close()
# print index
print(df.index)
......
import pandas
from sqlalchemy import create_engine
from sqlalchemy import create_engine, text
engine = create_engine("taosrest://root:taosdata@localhost:6041")
df: pandas.DataFrame = pandas.read_sql("SELECT * FROM power.meters", engine)
conn = engine.connect()
df: pandas.DataFrame = pandas.read_sql(text("SELECT * FROM power.meters"), conn)
conn.close()
# print index
print(df.index)
......
# ANCHOR: connect
from taosrest import connect, TaosRestConnection, TaosRestCursor
conn: TaosRestConnection = connect(url="http://localhost:6041",
user="root",
password="taosdata",
timeout=30)
conn = connect(url="http://localhost:6041",
user="root",
password="taosdata",
timeout=30)
# ANCHOR_END: connect
# ANCHOR: basic
# create STable
cursor: TaosRestCursor = conn.cursor()
cursor = conn.cursor()
cursor.execute("DROP DATABASE IF EXISTS power")
cursor.execute("CREATE DATABASE power")
cursor.execute("CREATE STABLE power.meters (ts TIMESTAMP, current FLOAT, voltage INT, phase FLOAT) TAGS (location BINARY(64), groupId INT)")
cursor.execute(
"CREATE STABLE power.meters (ts TIMESTAMP, current FLOAT, voltage INT, phase FLOAT) TAGS (location BINARY(64), groupId INT)")
# insert data
cursor.execute("""INSERT INTO power.d1001 USING power.meters TAGS(California.SanFrancisco, 2) VALUES ('2018-10-03 14:38:05.000', 10.30000, 219, 0.31000) ('2018-10-03 14:38:15.000', 12.60000, 218, 0.33000) ('2018-10-03 14:38:16.800', 12.30000, 221, 0.31000)
power.d1002 USING power.meters TAGS(California.SanFrancisco, 3) VALUES ('2018-10-03 14:38:16.650', 10.30000, 218, 0.25000)
power.d1003 USING power.meters TAGS(California.LosAngeles, 2) VALUES ('2018-10-03 14:38:05.500', 11.80000, 221, 0.28000) ('2018-10-03 14:38:16.600', 13.40000, 223, 0.29000)
power.d1004 USING power.meters TAGS(California.LosAngeles, 3) VALUES ('2018-10-03 14:38:05.000', 10.80000, 223, 0.29000) ('2018-10-03 14:38:06.500', 11.50000, 221, 0.35000)""")
cursor.execute("""INSERT INTO power.d1001 USING power.meters TAGS('California.SanFrancisco', 2) VALUES ('2018-10-03 14:38:05.000', 10.30000, 219, 0.31000) ('2018-10-03 14:38:15.000', 12.60000, 218, 0.33000) ('2018-10-03 14:38:16.800', 12.30000, 221, 0.31000)
power.d1002 USING power.meters TAGS('California.SanFrancisco', 3) VALUES ('2018-10-03 14:38:16.650', 10.30000, 218, 0.25000)
power.d1003 USING power.meters TAGS('California.LosAngeles', 2) VALUES ('2018-10-03 14:38:05.500', 11.80000, 221, 0.28000) ('2018-10-03 14:38:16.600', 13.40000, 223, 0.29000)
power.d1004 USING power.meters TAGS('California.LosAngeles', 3) VALUES ('2018-10-03 14:38:05.000', 10.80000, 223, 0.29000) ('2018-10-03 14:38:06.500', 11.50000, 221, 0.35000)""")
print("inserted row count:", cursor.rowcount)
# query data
......@@ -28,7 +29,7 @@ print("queried row count:", cursor.rowcount)
# get column names from cursor
column_names = [meta[0] for meta in cursor.description]
# get rows
data: list[tuple] = cursor.fetchall()
data = cursor.fetchall()
print(column_names)
for row in data:
print(row)
......
......@@ -8,7 +8,7 @@ conn.execute("CREATE DATABASE test")
# change database. same as execute "USE db"
conn.select_db("test")
conn.execute("CREATE STABLE weather(ts TIMESTAMP, temperature FLOAT) TAGS (location INT)")
affected_row: int = conn.execute("INSERT INTO t1 USING weather TAGS(1) VALUES (now, 23.5) (now+1m, 23.5) (now+2m 24.4)")
affected_row = conn.execute("INSERT INTO t1 USING weather TAGS(1) VALUES (now, 23.5) (now+1m, 23.5) (now+2m, 24.4)")
print("affected_row", affected_row)
# output:
# affected_row 3
......@@ -16,10 +16,10 @@ print("affected_row", affected_row)
# ANCHOR: query
# Execute a sql and get its result set. It's useful for SELECT statement
result: taos.TaosResult = conn.query("SELECT * from weather")
result = conn.query("SELECT * from weather")
# Get fields from result
fields: taos.field.TaosFields = result.fields
fields = result.fields
for field in fields:
print(field) # {name: ts, type: 9, bytes: 8}
......@@ -42,4 +42,4 @@ print(data)
# ANCHOR_END: query
conn.close()
conn.close()
\ No newline at end of file
# install dependencies:
# recommend python >= 3.8
# pip3 install faster-fifo
#
import logging
import math
import multiprocessing
import sys
import time
import os
from multiprocessing import Process
from faster_fifo import Queue
from multiprocessing import Process, Queue
from mockdatasource import MockDataSource
from queue import Empty
from typing import List
......@@ -22,8 +21,7 @@ TABLE_COUNT = 1000
QUEUE_SIZE = 1000000
MAX_BATCH_SIZE = 3000
read_processes = []
write_processes = []
_DONE_MESSAGE = '__DONE__'
def get_connection():
......@@ -44,41 +42,64 @@ def get_connection():
# ANCHOR: read
def run_read_task(task_id: int, task_queues: List[Queue]):
def run_read_task(task_id: int, task_queues: List[Queue], infinity):
table_count_per_task = TABLE_COUNT // READ_TASK_COUNT
data_source = MockDataSource(f"tb{task_id}", table_count_per_task)
data_source = MockDataSource(f"tb{task_id}", table_count_per_task, infinity)
try:
for batch in data_source:
if isinstance(batch, tuple):
batch = [batch]
for table_id, rows in batch:
# hash data to different queue
i = table_id % len(task_queues)
# block putting forever when the queue is full
task_queues[i].put_many(rows, block=True, timeout=-1)
for row in rows:
task_queues[i].put(row)
if not infinity:
for queue in task_queues:
queue.put(_DONE_MESSAGE)
except KeyboardInterrupt:
pass
finally:
logging.info('read task over')
# ANCHOR_END: read
# ANCHOR: write
def run_write_task(task_id: int, queue: Queue):
def run_write_task(task_id: int, queue: Queue, done_queue: Queue):
from sql_writer import SQLWriter
log = logging.getLogger(f"WriteTask-{task_id}")
writer = SQLWriter(get_connection)
lines = None
try:
while True:
try:
# get as many as possible
lines = queue.get_many(block=False, max_messages_to_get=MAX_BATCH_SIZE)
over = False
lines = []
for _ in range(MAX_BATCH_SIZE):
try:
line = queue.get_nowait()
if line == _DONE_MESSAGE:
over = True
break
if line:
lines.append(line)
except Empty:
time.sleep(0.1)
if len(lines) > 0:
writer.process_lines(lines)
except Empty:
time.sleep(0.01)
if over:
done_queue.put(_DONE_MESSAGE)
break
except KeyboardInterrupt:
pass
except BaseException as e:
log.debug(f"lines={lines}")
raise e
finally:
writer.close()
log.debug('write task over')
# ANCHOR_END: write
......@@ -103,47 +124,64 @@ def set_global_config():
# ANCHOR: monitor
def run_monitor_process():
def run_monitor_process(done_queue: Queue):
log = logging.getLogger("DataBaseMonitor")
conn = get_connection()
conn.execute("DROP DATABASE IF EXISTS test")
conn.execute("CREATE DATABASE test")
conn.execute("CREATE STABLE test.meters (ts TIMESTAMP, current FLOAT, voltage INT, phase FLOAT) "
"TAGS (location BINARY(64), groupId INT)")
conn = None
try:
conn = get_connection()
def get_count():
res = conn.query("SELECT count(*) FROM test.meters")
rows = res.fetch_all()
return rows[0][0] if rows else 0
def get_count():
res = conn.query("SELECT count(*) FROM test.meters")
rows = res.fetch_all()
return rows[0][0] if rows else 0
last_count = 0
while True:
time.sleep(10)
count = get_count()
log.info(f"count={count} speed={(count - last_count) / 10}")
last_count = count
last_count = 0
while True:
try:
done = done_queue.get_nowait()
if done == _DONE_MESSAGE:
break
except Empty:
pass
time.sleep(10)
count = get_count()
log.info(f"count={count} speed={(count - last_count) / 10}")
last_count = count
finally:
conn.close()
# ANCHOR_END: monitor
# ANCHOR: main
def main():
def main(infinity):
set_global_config()
logging.info(f"READ_TASK_COUNT={READ_TASK_COUNT}, WRITE_TASK_COUNT={WRITE_TASK_COUNT}, "
f"TABLE_COUNT={TABLE_COUNT}, QUEUE_SIZE={QUEUE_SIZE}, MAX_BATCH_SIZE={MAX_BATCH_SIZE}")
monitor_process = Process(target=run_monitor_process)
conn = get_connection()
conn.execute("DROP DATABASE IF EXISTS test")
conn.execute("CREATE DATABASE IF NOT EXISTS test")
conn.execute("CREATE STABLE IF NOT EXISTS test.meters (ts TIMESTAMP, current FLOAT, voltage INT, phase FLOAT) "
"TAGS (location BINARY(64), groupId INT)")
conn.close()
done_queue = Queue()
monitor_process = Process(target=run_monitor_process, args=(done_queue,))
monitor_process.start()
time.sleep(3) # waiting for database ready.
logging.debug(f"monitor task started with pid {monitor_process.pid}")
task_queues: List[Queue] = []
write_processes = []
read_processes = []
# create task queues
for i in range(WRITE_TASK_COUNT):
queue = Queue(max_size_bytes=QUEUE_SIZE)
queue = Queue()
task_queues.append(queue)
# create write processes
for i in range(WRITE_TASK_COUNT):
p = Process(target=run_write_task, args=(i, task_queues[i]))
p = Process(target=run_write_task, args=(i, task_queues[i], done_queue))
p.start()
logging.debug(f"WriteTask-{i} started with pid {p.pid}")
write_processes.append(p)
......@@ -151,13 +189,19 @@ def main():
# create read processes
for i in range(READ_TASK_COUNT):
queues = assign_queues(i, task_queues)
p = Process(target=run_read_task, args=(i, queues))
p = Process(target=run_read_task, args=(i, queues, infinity))
p.start()
logging.debug(f"ReadTask-{i} started with pid {p.pid}")
read_processes.append(p)
try:
monitor_process.join()
for p in read_processes:
p.join()
for p in write_processes:
p.join()
time.sleep(1)
return
except KeyboardInterrupt:
monitor_process.terminate()
[p.terminate() for p in read_processes]
......@@ -176,5 +220,6 @@ def assign_queues(read_task_id, task_queues):
if __name__ == '__main__':
main()
multiprocessing.set_start_method('spawn')
main(False)
# ANCHOR_END: main
......@@ -26,7 +26,8 @@ class Consumer(object):
'bath_consume': True,
'batch_size': 1000,
'async_model': True,
'workers': 10
'workers': 10,
'testing': False
}
LOCATIONS = ['California.SanFrancisco', 'California.LosAngles', 'California.SanDiego', 'California.SanJose',
......@@ -46,11 +47,12 @@ class Consumer(object):
def __init__(self, **configs):
self.config: dict = self.DEFAULT_CONFIGS
self.config.update(configs)
self.consumer = KafkaConsumer(
self.config.get('kafka_topic'), # topic
bootstrap_servers=self.config.get('kafka_brokers'),
group_id=self.config.get('kafka_group_id'),
)
if not self.config.get('testing'):
self.consumer = KafkaConsumer(
self.config.get('kafka_topic'), # topic
bootstrap_servers=self.config.get('kafka_brokers'),
group_id=self.config.get('kafka_group_id'),
)
self.taos = taos.connect(
host=self.config.get('taos_host'),
user=self.config.get('taos_user'),
......@@ -60,7 +62,7 @@ class Consumer(object):
)
if self.config.get('async_model'):
self.pool = ThreadPoolExecutor(max_workers=self.config.get('workers'))
self.tasks: list[Future] = []
self.tasks = []
# tags and table mapping # key: {location}_{groupId} value:
self.tag_table_mapping = {}
i = 0
......@@ -104,8 +106,8 @@ class Consumer(object):
for task in self.tasks:
while not task.done():
pass
if self.pool is not None:
self.pool.shutdown()
if self.pool is not None:
self.pool.shutdown()
# clean data
if self.config.get('clean_after_testing'):
......@@ -115,14 +117,14 @@ class Consumer(object):
if self.taos is not None:
self.taos.close()
def _run(self, f: Callable[[ConsumerRecord], bool]):
def _run(self, f):
for message in self.consumer:
if self.config.get('async_model'):
self.pool.submit(f(message))
else:
f(message)
def _run_batch(self, f: Callable[[list[list[ConsumerRecord]]], None]):
def _run_batch(self, f):
while True:
messages = self.consumer.poll(timeout_ms=500, max_records=self.config.get('batch_size'))
if messages:
......@@ -140,7 +142,7 @@ class Consumer(object):
logging.info('## insert sql %s', sql)
return self.taos.execute(sql=sql) == 1
def _to_taos_batch(self, messages: list[list[ConsumerRecord]]):
def _to_taos_batch(self, messages):
sql = self._build_sql_batch(messages=messages)
if len(sql) == 0: # decode error, skip
return
......@@ -162,7 +164,7 @@ class Consumer(object):
table_name = self._get_table_name(location=location, group_id=group_id)
return self.INSERT_PART_SQL.format(table_name, ts, current, voltage, phase)
def _build_sql_batch(self, messages: list[list[ConsumerRecord]]) -> str:
def _build_sql_batch(self, messages) -> str:
sql_list = []
for partition_messages in messages:
for message in partition_messages:
......@@ -186,7 +188,55 @@ def _get_location_and_group(key: str) -> (str, int):
return fields[0], fields[1]
def test_to_taos(consumer: Consumer):
msg = {
'location': 'California.SanFrancisco',
'groupId': 1,
'ts': '2022-12-06 15:13:38.643',
'current': 3.41,
'voltage': 105,
'phase': 0.02027,
}
record = ConsumerRecord(checksum=None, headers=None, offset=1, key=None, value=json.dumps(msg), partition=1,
topic='test', serialized_key_size=None, serialized_header_size=None,
serialized_value_size=None, timestamp=time.time(), timestamp_type=None)
assert consumer._to_taos(message=record)
def test_to_taos_batch(consumer: Consumer):
records = [
[
ConsumerRecord(checksum=None, headers=None, offset=1, key=None,
value=json.dumps({'location': 'California.SanFrancisco',
'groupId': 1,
'ts': '2022-12-06 15:13:38.643',
'current': 3.41,
'voltage': 105,
'phase': 0.02027, }),
partition=1, topic='test', serialized_key_size=None, serialized_header_size=None,
serialized_value_size=None, timestamp=time.time(), timestamp_type=None),
ConsumerRecord(checksum=None, headers=None, offset=1, key=None,
value=json.dumps({'location': 'California.LosAngles',
'groupId': 2,
'ts': '2022-12-06 15:13:39.643',
'current': 3.41,
'voltage': 102,
'phase': 0.02027, }),
partition=1, topic='test', serialized_key_size=None, serialized_header_size=None,
serialized_value_size=None, timestamp=time.time(), timestamp_type=None),
]
]
consumer._to_taos_batch(messages=records)
if __name__ == '__main__':
consumer = Consumer(async_model=True)
consumer = Consumer(async_model=True, testing=True)
# init env
consumer.init_env()
consumer.consume()
\ No newline at end of file
# consumer.consume()
# test build sql
# test build sql batch
test_to_taos(consumer)
test_to_taos_batch(consumer)
\ No newline at end of file
......@@ -10,13 +10,14 @@ class MockDataSource:
"9.4,118,0.141,California.SanFrancisco,4"
]
def __init__(self, tb_name_prefix, table_count):
def __init__(self, tb_name_prefix, table_count, infinity=True):
self.table_name_prefix = tb_name_prefix + "_"
self.table_count = table_count
self.max_rows = 10000000
self.current_ts = round(time.time() * 1000) - self.max_rows * 100
# [(tableId, tableName, values),]
self.data = self._init_data()
self.infinity = infinity
def _init_data(self):
lines = self.samples * (self.table_count // 5 + 1)
......@@ -28,14 +29,19 @@ class MockDataSource:
def __iter__(self):
self.row = 0
return self
if not self.infinity:
return iter(self._iter_data())
else:
return self
def __next__(self):
"""
next 1000 rows for each table.
return: {tableId:[row,...]}
"""
# generate 1000 timestamps
return self._iter_data()
def _iter_data(self):
ts = []
for _ in range(1000):
self.current_ts += 100
......@@ -47,3 +53,10 @@ class MockDataSource:
rows = [table_name + ',' + t + ',' + values for t in ts]
result.append((table_id, rows))
return result
if __name__ == '__main__':
datasource = MockDataSource('t', 10, False)
for data in datasource:
print(data)
\ No newline at end of file
......@@ -10,6 +10,7 @@ class SQLWriter:
self._tb_tags = {}
self._conn = get_connection_func()
self._max_sql_length = self.get_max_sql_length()
self._conn.execute("create database if not exists test")
self._conn.execute("USE test")
def get_max_sql_length(self):
......@@ -20,7 +21,7 @@ class SQLWriter:
return int(r[1])
return 1024 * 1024
def process_lines(self, lines: str):
def process_lines(self, lines: [str]):
"""
:param lines: [[tbName,ts,current,voltage,phase,location,groupId]]
"""
......@@ -60,6 +61,7 @@ class SQLWriter:
buf.append(q)
sql_len += len(q)
sql += " ".join(buf)
self.create_tables()
self.execute_sql(sql)
self._tb_values.clear()
......@@ -88,3 +90,23 @@ class SQLWriter:
except BaseException as e:
self.log.error("Execute SQL: %s", sql)
raise e
def close(self):
if self._conn:
self._conn.close()
if __name__ == '__main__':
def get_connection_func():
conn = taos.connect()
return conn
writer = SQLWriter(get_connection_func=get_connection_func)
writer.execute_sql(
"create stable if not exists meters (ts timestamp, current float, voltage int, phase float) "
"tags (location binary(64), groupId int)")
writer.execute_sql(
"INSERT INTO d21001 USING meters TAGS ('California.SanFrancisco', 2) "
"VALUES ('2021-07-13 14:06:32.272', 10.2, 219, 0.32)")
\ No newline at end of file
from taos.tmq import Consumer
import taos
from taos.tmq import *
conn = taos.connect()
print("init")
conn.execute("drop topic if exists topic_ctb_column")
conn.execute("drop database if exists py_tmq")
conn.execute("create database if not exists py_tmq vgroups 2")
conn.select_db("py_tmq")
conn.execute(
"create stable if not exists stb1 (ts timestamp, c1 int, c2 float, c3 binary(10)) tags(t1 int)"
)
conn.execute("create table if not exists tb1 using stb1 tags(1)")
conn.execute("create table if not exists tb2 using stb1 tags(2)")
conn.execute("create table if not exists tb3 using stb1 tags(3)")
print("create topic")
conn.execute(
"create topic if not exists topic_ctb_column as select ts, c1, c2, c3 from stb1"
)
print("build consumer")
conf = TaosTmqConf()
conf.set("group.id", "tg2")
conf.set("td.connect.user", "root")
conf.set("td.connect.pass", "taosdata")
conf.set("enable.auto.commit", "true")
def tmq_commit_cb_print(tmq, resp, offset, param=None):
print(f"commit: {resp}, tmq: {tmq}, offset: {offset}, param: {param}")
conf.set_auto_commit_cb(tmq_commit_cb_print, None)
tmq = conf.new_consumer()
print("build topic list")
topic_list = TaosTmqList()
topic_list.append("topic_ctb_column")
print("basic consume loop")
tmq.subscribe(topic_list)
sub_list = tmq.subscription()
print("subscribed topics: ", sub_list)
while 1:
res = tmq.poll(1000)
if res:
topic = res.get_topic_name()
vg = res.get_vgroup_id()
db = res.get_db_name()
print(f"topic: {topic}\nvgroup id: {vg}\ndb: {db}")
for row in res:
print(row)
def init_tmq_env(db, topic):
conn = taos.connect()
conn.execute("drop topic if exists {}".format(topic))
conn.execute("drop database if exists {}".format(db))
conn.execute("create database if not exists {}".format(db))
conn.select_db(db)
conn.execute(
"create stable if not exists stb1 (ts timestamp, c1 int, c2 float, c3 varchar(16)) tags(t1 int, t3 varchar(16))")
conn.execute("create table if not exists tb1 using stb1 tags(1, 't1')")
conn.execute("create table if not exists tb2 using stb1 tags(2, 't2')")
conn.execute("create table if not exists tb3 using stb1 tags(3, 't3')")
conn.execute("create topic if not exists {} as select ts, c1, c2, c3 from stb1".format(topic))
conn.execute("insert into tb1 values (now, 1, 1.0, 'tmq test')")
conn.execute("insert into tb2 values (now, 2, 2.0, 'tmq test')")
conn.execute("insert into tb3 values (now, 3, 3.0, 'tmq test')")
def cleanup(db, topic):
conn = taos.connect()
conn.execute("drop topic if exists {}".format(topic))
conn.execute("drop database if exists {}".format(db))
if __name__ == '__main__':
init_tmq_env("tmq_test", "tmq_test_topic") # init env
consumer = Consumer(
{
"group.id": "tg2",
"td.connect.user": "root",
"td.connect.pass": "taosdata",
"enable.auto.commit": "true",
}
)
consumer.subscribe(["tmq_test_topic"])
try:
while True:
res = consumer.poll(1)
if not res:
break
err = res.error()
if err is not None:
raise err
val = res.value()
for block in val:
print(block.fetchall())
finally:
consumer.unsubscribe()
consumer.close()
cleanup("tmq_test", "tmq_test_topic")
\ No newline at end of file
......@@ -201,6 +201,7 @@ int32_t walFetchHead(SWalReader *pRead, int64_t ver, SWalCkHead *pHead);
int32_t walFetchBody(SWalReader *pRead, SWalCkHead **ppHead);
int32_t walSkipFetchBody(SWalReader *pRead, const SWalCkHead *pHead);
SWalRef *walRefFirstVer(SWal *, SWalRef *);
SWalRef *walRefCommittedVer(SWal *);
SWalRef *walOpenRef(SWal *);
......
......@@ -210,8 +210,8 @@ function install_bin() {
[ -x ${install_main_dir}/bin/${serverName} ] && ${csudo}ln -s ${install_main_dir}/bin/${serverName} ${bin_link_dir}/${serverName} || :
[ -x ${install_main_dir}/bin/${udfdName} ] && ${csudo}ln -s ${install_main_dir}/bin/${udfdName} ${bin_link_dir}/${udfdName} || :
[ -x ${install_main_dir}/bin/${adapterName} ] && ${csudo}ln -s ${install_main_dir}/bin/${adapterName} ${bin_link_dir}/${adapterName} || :
[ -x ${install_main_dir}/bin/${benchmarkName} ] && ${csudo}ln -s ${install_main_dir}/bin/${benchmarkName} ${bin_link_dir}/${demoName} || :
[ -x ${install_main_dir}/bin/${benchmarkName} ] && ${csudo}ln -s ${install_main_dir}/bin/${benchmarkName} ${bin_link_dir}/${benchmarkName} || :
[ -x ${install_main_dir}/bin/${benchmarkName} ] && ${csudo}ln -sf ${install_main_dir}/bin/${benchmarkName} ${bin_link_dir}/${demoName} || :
[ -x ${install_main_dir}/bin/${benchmarkName} ] && ${csudo}ln -sf ${install_main_dir}/bin/${benchmarkName} ${bin_link_dir}/${benchmarkName} || :
[ -x ${install_main_dir}/bin/${dumpName} ] && ${csudo}ln -s ${install_main_dir}/bin/${dumpName} ${bin_link_dir}/${dumpName} || :
[ -x ${install_main_dir}/bin/${xname} ] && ${csudo}ln -s ${install_main_dir}/bin/${xname} ${bin_link_dir}/${xname} || :
[ -x ${install_main_dir}/bin/TDinsight.sh ] && ${csudo}ln -s ${install_main_dir}/bin/TDinsight.sh ${bin_link_dir}/TDinsight.sh || :
......@@ -746,7 +746,7 @@ function is_version_compatible() {
deb_erase() {
confirm=""
while [ "" == "${confirm}" ]; do
echo -e -n "${RED}Exist tdengine deb detected, do you want to remove it? [yes|no] ${NC}:"
echo -e -n "${RED}Existing TDengine deb is detected, do you want to remove it? [yes|no] ${NC}:"
read confirm
if [ "yes" == "$confirm" ]; then
${csudo}dpkg --remove tdengine ||:
......@@ -760,7 +760,7 @@ deb_erase() {
rpm_erase() {
confirm=""
while [ "" == "${confirm}" ]; do
echo -e -n "${RED}Exist tdengine rpm detected, do you want to remove it? [yes|no] ${NC}:"
echo -e -n "${RED}Existing TDengine rpm is detected, do you want to remove it? [yes|no] ${NC}:"
read confirm
if [ "yes" == "$confirm" ]; then
${csudo}rpm -e tdengine ||:
......@@ -787,7 +787,7 @@ function updateProduct() {
if echo $osinfo | grep -qwi "centos"; then
rpm -q tdengine 2>&1 > /dev/null && rpm_erase tdengine ||:
elif echo $osinfo | grep -qwi "ubuntu"; then
dpkg -l tdengine 2>&1 > /dev/null && deb_erase tdengine ||:
dpkg -l tdengine 2>&1 | grep ii > /dev/null && deb_erase tdengine ||:
fi
tar -zxf ${tarName}
......
......@@ -79,8 +79,6 @@ int32_t vmOpenVnode(SVnodeMgmt *pMgmt, SWrapperCfg *pCfg, SVnode *pImpl) {
void vmCloseVnode(SVnodeMgmt *pMgmt, SVnodeObj *pVnode) {
char path[TSDB_FILENAME_LEN] = {0};
vnodeProposeCommitOnNeed(pVnode->pImpl);
taosThreadRwlockWrlock(&pMgmt->lock);
taosHashRemove(pMgmt->hash, &pVnode->vgId, sizeof(int32_t));
taosThreadRwlockUnlock(&pMgmt->lock);
......
......@@ -202,6 +202,7 @@ int32_t tsdbCmprColData(SColData *pColData, int8_t cmprAlg, SBlockCol *pBlockCol
uint8_t **ppBuf);
int32_t tsdbDecmprColData(uint8_t *pIn, SBlockCol *pBlockCol, int8_t cmprAlg, int32_t nVal, SColData *pColData,
uint8_t **ppBuf);
int32_t tRowInfoCmprFn(const void *p1, const void *p2);
// tsdbMemTable ==============================================================================================
// SMemTable
int32_t tsdbMemTableCreate(STsdb *pTsdb, SMemTable **ppMemTable);
......
......@@ -247,7 +247,7 @@ int32_t tsdbSnapReaderClose(STsdbSnapReader** ppReader);
int32_t tsdbSnapRead(STsdbSnapReader* pReader, uint8_t** ppData);
// STsdbSnapWriter ========================================
int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWriter** ppWriter);
int32_t tsdbSnapWrite(STsdbSnapWriter* pWriter, uint8_t* pData, uint32_t nData);
int32_t tsdbSnapWrite(STsdbSnapWriter* pWriter, SSnapDataHdr* pHdr);
int32_t tsdbSnapWriterPrepareClose(STsdbSnapWriter* pWriter);
int32_t tsdbSnapWriterClose(STsdbSnapWriter** ppWriter, int8_t rollback);
// STqSnapshotReader ==
......
......@@ -423,10 +423,10 @@ int32_t rsmaSnapWrite(SRSmaSnapWriter* pWriter, uint8_t* pData, uint32_t nData)
// rsma1/rsma2
if (pHdr->type == SNAP_DATA_RSMA1) {
pHdr->type = SNAP_DATA_TSDB;
code = tsdbSnapWrite(pWriter->pDataWriter[0], pData, nData);
code = tsdbSnapWrite(pWriter->pDataWriter[0], pHdr);
} else if (pHdr->type == SNAP_DATA_RSMA2) {
pHdr->type = SNAP_DATA_TSDB;
code = tsdbSnapWrite(pWriter->pDataWriter[1], pData, nData);
code = tsdbSnapWrite(pWriter->pDataWriter[1], pHdr);
} else if (pHdr->type == SNAP_DATA_QTASK) {
code = rsmaSnapWriteQTaskInfo(pWriter, pData, nData);
} else {
......
......@@ -521,7 +521,12 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) {
tqOffsetResetToData(&fetchOffsetNew, 0, 0);
}
} else {
tqOffsetResetToLog(&fetchOffsetNew, walGetFirstVer(pTq->pVnode->pWal));
pHandle->pRef = walRefFirstVer(pTq->pVnode->pWal, pHandle->pRef);
if (pHandle->pRef == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
tqOffsetResetToLog(&fetchOffsetNew, pHandle->pRef->refVer - 1);
}
} else if (reqOffset.type == TMQ_OFFSET__RESET_LATEST) {
if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) {
......@@ -719,6 +724,8 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) {
int32_t tqProcessDeleteSubReq(STQ* pTq, int64_t version, char* msg, int32_t msgLen) {
SMqVDeleteReq* pReq = (SMqVDeleteReq*)msg;
tqDebug("vgId:%d, delete sub: %s", pTq->pVnode->config.vgId, pReq->subKey);
taosWLockLatch(&pTq->pushLock);
int32_t code = taosHashRemove(pTq->pPushMgr, pReq->subKey, strlen(pReq->subKey));
if (code != 0) {
......
......@@ -15,274 +15,628 @@
#include "tsdb.h"
// STsdbSnapReader ========================================
typedef enum { SNAP_DATA_FILE_ITER = 0, SNAP_STT_FILE_ITER } EFIterT;
extern int32_t tsdbReadDataBlockEx(SDataFReader* pReader, SDataBlk* pDataBlk, SBlockData* pBlockData);
extern int32_t tsdbUpdateTableSchema(SMeta* pMeta, int64_t suid, int64_t uid, SSkmInfo* pSkmInfo);
extern int32_t tsdbWriteDataBlock(SDataFWriter* pWriter, SBlockData* pBlockData, SMapData* mDataBlk, int8_t cmprAlg);
extern int32_t tsdbWriteSttBlock(SDataFWriter* pWriter, SBlockData* pBlockData, SArray* aSttBlk, int8_t cmprAlg);
// STsdbDataIter2 ========================================
#define TSDB_MEM_TABLE_DATA_ITER 0
#define TSDB_DATA_FILE_DATA_ITER 1
#define TSDB_STT_FILE_DATA_ITER 2
#define TSDB_TOMB_FILE_DATA_ITER 3
typedef struct STsdbDataIter2 STsdbDataIter2;
typedef struct STsdbFilterInfo STsdbFilterInfo;
typedef struct {
SRBTreeNode n;
SRowInfo rInfo;
EFIterT type;
int64_t suid;
int64_t uid;
SDelData delData;
} SDelInfo;
struct STsdbDataIter2 {
STsdbDataIter2* next;
SRBTreeNode rbtn;
int32_t type;
SRowInfo rowInfo;
SDelInfo delInfo;
union {
// TSDB_MEM_TABLE_DATA_ITER
struct {
SArray* aBlockIdx;
int32_t iBlockIdx;
SBlockIdx* pBlockIdx;
SMapData mBlock;
int32_t iBlock;
}; // .data file
SMemTable* pMemTable;
} mIter;
// TSDB_DATA_FILE_DATA_ITER
struct {
int32_t iStt;
SArray* aSttBlk;
int32_t iSttBlk;
}; // .stt file
SDataFReader* pReader;
SArray* aBlockIdx; // SArray<SBlockIdx>
SMapData mDataBlk;
SBlockData bData;
int32_t iBlockIdx;
int32_t iDataBlk;
int32_t iRow;
} dIter;
// TSDB_STT_FILE_DATA_ITER
struct {
SDataFReader* pReader;
int32_t iStt;
SArray* aSttBlk;
SBlockData bData;
int32_t iSttBlk;
int32_t iRow;
} sIter;
// TSDB_TOMB_FILE_DATA_ITER
struct {
SDelFReader* pReader;
SArray* aDelIdx;
SArray* aDelData;
int32_t iDelIdx;
int32_t iDelData;
} tIter;
};
SBlockData bData;
int32_t iRow;
} SFDataIter;
};
struct STsdbSnapReader {
STsdb* pTsdb;
#define TSDB_FILTER_FLAG_BY_VERSION 0x1
struct STsdbFilterInfo {
int32_t flag;
int64_t sver;
int64_t ever;
STsdbFS fs;
int8_t type;
// for data file
int8_t dataDone;
int32_t fid;
SDataFReader* pDataFReader;
SFDataIter* pIter;
SRBTree rbt;
SFDataIter aFDataIter[TSDB_MAX_STT_TRIGGER + 1];
SBlockData bData;
SSkmInfo skmTable;
// for del file
int8_t delDone;
SDelFReader* pDelFReader;
SArray* aDelIdx; // SArray<SDelIdx>
int32_t iDelIdx;
SArray* aDelData; // SArray<SDelData>
uint8_t* aBuf[5];
};
extern int32_t tRowInfoCmprFn(const void* p1, const void* p2);
extern int32_t tsdbReadDataBlockEx(SDataFReader* pReader, SDataBlk* pDataBlk, SBlockData* pBlockData);
extern int32_t tsdbUpdateTableSchema(SMeta* pMeta, int64_t suid, int64_t uid, SSkmInfo* pSkmInfo);
#define TSDB_RBTN_TO_DATA_ITER(pNode) ((STsdbDataIter2*)(((char*)pNode) - offsetof(STsdbDataIter2, rbtn)))
/* open */
static int32_t tsdbOpenDataFileDataIter(SDataFReader* pReader, STsdbDataIter2** ppIter) {
int32_t code = 0;
int32_t lino = 0;
// create handle
STsdbDataIter2* pIter = (STsdbDataIter2*)taosMemoryCalloc(1, sizeof(*pIter));
if (pIter == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
pIter->type = TSDB_DATA_FILE_DATA_ITER;
pIter->dIter.pReader = pReader;
if ((pIter->dIter.aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx))) == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
code = tBlockDataCreate(&pIter->dIter.bData);
TSDB_CHECK_CODE(code, lino, _exit);
pIter->dIter.iBlockIdx = 0;
pIter->dIter.iDataBlk = 0;
pIter->dIter.iRow = 0;
// read data
code = tsdbReadBlockIdx(pReader, pIter->dIter.aBlockIdx);
TSDB_CHECK_CODE(code, lino, _exit);
static int32_t tFDataIterCmprFn(const SRBTreeNode* pNode1, const SRBTreeNode* pNode2) {
SFDataIter* pIter1 = (SFDataIter*)(((uint8_t*)pNode1) - offsetof(SFDataIter, n));
SFDataIter* pIter2 = (SFDataIter*)(((uint8_t*)pNode2) - offsetof(SFDataIter, n));
if (taosArrayGetSize(pIter->dIter.aBlockIdx) == 0) goto _clear;
return tRowInfoCmprFn(&pIter1->rInfo, &pIter2->rInfo);
_exit:
if (code) {
if (pIter) {
_clear:
tBlockDataDestroy(&pIter->dIter.bData, 1);
taosArrayDestroy(pIter->dIter.aBlockIdx);
taosMemoryFree(pIter);
pIter = NULL;
}
}
*ppIter = pIter;
return code;
}
static int32_t tsdbSnapReadOpenFile(STsdbSnapReader* pReader) {
static int32_t tsdbOpenSttFileDataIter(SDataFReader* pReader, int32_t iStt, STsdbDataIter2** ppIter) {
int32_t code = 0;
int32_t lino = 0;
SDFileSet dFileSet = {.fid = pReader->fid};
SDFileSet* pSet = taosArraySearch(pReader->fs.aDFileSet, &dFileSet, tDFileSetCmprFn, TD_GT);
if (pSet == NULL) return code;
// create handle
STsdbDataIter2* pIter = (STsdbDataIter2*)taosMemoryCalloc(1, sizeof(*pIter));
if (pIter == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
pReader->fid = pSet->fid;
code = tsdbDataFReaderOpen(&pReader->pDataFReader, pReader->pTsdb, pSet);
TSDB_CHECK_CODE(code, lino, _exit);
pIter->type = TSDB_STT_FILE_DATA_ITER;
pIter->sIter.pReader = pReader;
pIter->sIter.iStt = iStt;
pIter->sIter.aSttBlk = taosArrayInit(0, sizeof(SSttBlk));
if (pIter->sIter.aSttBlk == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
pReader->pIter = NULL;
tRBTreeCreate(&pReader->rbt, tFDataIterCmprFn);
code = tBlockDataCreate(&pIter->sIter.bData);
TSDB_CHECK_CODE(code, lino, _exit);
// .data file
SFDataIter* pIter = &pReader->aFDataIter[0];
pIter->type = SNAP_DATA_FILE_ITER;
pIter->sIter.iSttBlk = 0;
pIter->sIter.iRow = 0;
code = tsdbReadBlockIdx(pReader->pDataFReader, pIter->aBlockIdx);
// read data
code = tsdbReadSttBlk(pReader, iStt, pIter->sIter.aSttBlk);
TSDB_CHECK_CODE(code, lino, _exit);
for (pIter->iBlockIdx = 0; pIter->iBlockIdx < taosArrayGetSize(pIter->aBlockIdx); pIter->iBlockIdx++) {
pIter->pBlockIdx = (SBlockIdx*)taosArrayGet(pIter->aBlockIdx, pIter->iBlockIdx);
if (taosArrayGetSize(pIter->sIter.aSttBlk) == 0) goto _clear;
code = tsdbReadDataBlk(pReader->pDataFReader, pIter->pBlockIdx, &pIter->mBlock);
_exit:
if (code) {
if (pIter) {
_clear:
taosArrayDestroy(pIter->sIter.aSttBlk);
tBlockDataDestroy(&pIter->sIter.bData, 1);
taosMemoryFree(pIter);
pIter = NULL;
}
}
*ppIter = pIter;
return code;
}
static int32_t tsdbOpenTombFileDataIter(SDelFReader* pReader, STsdbDataIter2** ppIter) {
int32_t code = 0;
int32_t lino = 0;
STsdbDataIter2* pIter = (STsdbDataIter2*)taosMemoryCalloc(1, sizeof(*pIter));
if (pIter == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
pIter->type = TSDB_TOMB_FILE_DATA_ITER;
pIter->tIter.pReader = pReader;
if ((pIter->tIter.aDelIdx = taosArrayInit(0, sizeof(SDelIdx))) == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
if ((pIter->tIter.aDelData = taosArrayInit(0, sizeof(SDelData))) == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
for (pIter->iBlock = 0; pIter->iBlock < pIter->mBlock.nItem; pIter->iBlock++) {
SDataBlk dataBlk;
tMapDataGetItemByIdx(&pIter->mBlock, pIter->iBlock, &dataBlk, tGetDataBlk);
code = tsdbReadDelIdx(pReader, pIter->tIter.aDelIdx);
TSDB_CHECK_CODE(code, lino, _exit);
if (dataBlk.minVer > pReader->ever || dataBlk.maxVer < pReader->sver) continue;
if (taosArrayGetSize(pIter->tIter.aDelIdx) == 0) goto _clear;
code = tsdbReadDataBlockEx(pReader->pDataFReader, &dataBlk, &pIter->bData);
TSDB_CHECK_CODE(code, lino, _exit);
pIter->tIter.iDelIdx = 0;
pIter->tIter.iDelData = 0;
ASSERT(pIter->pBlockIdx->suid == pIter->bData.suid);
ASSERT(pIter->pBlockIdx->uid == pIter->bData.uid);
_exit:
if (code) {
if (pIter) {
_clear:
taosArrayDestroy(pIter->tIter.aDelIdx);
taosArrayDestroy(pIter->tIter.aDelData);
taosMemoryFree(pIter);
pIter = NULL;
}
}
*ppIter = pIter;
return code;
}
/* close */
static void tsdbCloseDataFileDataIter(STsdbDataIter2* pIter) {
tBlockDataDestroy(&pIter->dIter.bData, 1);
tMapDataClear(&pIter->dIter.mDataBlk);
taosArrayDestroy(pIter->dIter.aBlockIdx);
taosMemoryFree(pIter);
}
static void tsdbCloseSttFileDataIter(STsdbDataIter2* pIter) {
tBlockDataDestroy(&pIter->sIter.bData, 1);
taosArrayDestroy(pIter->sIter.aSttBlk);
taosMemoryFree(pIter);
}
for (pIter->iRow = 0; pIter->iRow < pIter->bData.nRow; pIter->iRow++) {
int64_t rowVer = pIter->bData.aVersion[pIter->iRow];
static void tsdbCloseTombFileDataIter(STsdbDataIter2* pIter) {
taosArrayDestroy(pIter->tIter.aDelData);
taosArrayDestroy(pIter->tIter.aDelIdx);
taosMemoryFree(pIter);
}
if (rowVer >= pReader->sver && rowVer <= pReader->ever) {
pIter->rInfo.suid = pIter->pBlockIdx->suid;
pIter->rInfo.uid = pIter->pBlockIdx->uid;
pIter->rInfo.row = tsdbRowFromBlockData(&pIter->bData, pIter->iRow);
goto _add_iter_and_break;
static void tsdbCloseDataIter2(STsdbDataIter2* pIter) {
if (pIter->type == TSDB_MEM_TABLE_DATA_ITER) {
ASSERT(0);
} else if (pIter->type == TSDB_DATA_FILE_DATA_ITER) {
tsdbCloseDataFileDataIter(pIter);
} else if (pIter->type == TSDB_STT_FILE_DATA_ITER) {
tsdbCloseSttFileDataIter(pIter);
} else if (pIter->type == TSDB_TOMB_FILE_DATA_ITER) {
tsdbCloseTombFileDataIter(pIter);
} else {
ASSERT(0);
}
}
/* cmpr */
static int32_t tsdbDataIterCmprFn(const SRBTreeNode* pNode1, const SRBTreeNode* pNode2) {
STsdbDataIter2* pIter1 = TSDB_RBTN_TO_DATA_ITER(pNode1);
STsdbDataIter2* pIter2 = TSDB_RBTN_TO_DATA_ITER(pNode2);
return tRowInfoCmprFn(&pIter1->rowInfo, &pIter2->rowInfo);
}
/* seek */
/* iter next */
static int32_t tsdbDataFileDataIterNext(STsdbDataIter2* pIter, STsdbFilterInfo* pFilterInfo) {
int32_t code = 0;
int32_t lino = 0;
for (;;) {
while (pIter->dIter.iRow < pIter->dIter.bData.nRow) {
if (pFilterInfo) {
if (pFilterInfo->flag & TSDB_FILTER_FLAG_BY_VERSION) {
if (pIter->dIter.bData.aVersion[pIter->dIter.iRow] < pFilterInfo->sver ||
pIter->dIter.bData.aVersion[pIter->dIter.iRow] > pFilterInfo->ever) {
pIter->dIter.iRow++;
continue;
}
}
}
pIter->rowInfo.suid = pIter->dIter.bData.suid;
pIter->rowInfo.uid = pIter->dIter.bData.uid;
pIter->rowInfo.row = tsdbRowFromBlockData(&pIter->dIter.bData, pIter->dIter.iRow);
pIter->dIter.iRow++;
goto _exit;
}
continue;
for (;;) {
while (pIter->dIter.iDataBlk < pIter->dIter.mDataBlk.nItem) {
SDataBlk dataBlk;
tMapDataGetItemByIdx(&pIter->dIter.mDataBlk, pIter->dIter.iDataBlk, &dataBlk, tGetDataBlk);
// filter
if (pFilterInfo) {
if (pFilterInfo->flag & TSDB_FILTER_FLAG_BY_VERSION) {
if (pFilterInfo->sver > dataBlk.maxVer || pFilterInfo->ever < dataBlk.minVer) {
pIter->dIter.iDataBlk++;
continue;
}
}
}
_add_iter_and_break:
tRBTreePut(&pReader->rbt, (SRBTreeNode*)pIter);
break;
}
code = tsdbReadDataBlockEx(pIter->dIter.pReader, &dataBlk, &pIter->dIter.bData);
TSDB_CHECK_CODE(code, lino, _exit);
// .stt file
pIter = &pReader->aFDataIter[1];
for (int32_t iStt = 0; iStt < pSet->nSttF; iStt++) {
pIter->type = SNAP_STT_FILE_ITER;
pIter->iStt = iStt;
pIter->dIter.iDataBlk++;
pIter->dIter.iRow = 0;
code = tsdbReadSttBlk(pReader->pDataFReader, iStt, pIter->aSttBlk);
TSDB_CHECK_CODE(code, lino, _exit);
break;
}
for (pIter->iSttBlk = 0; pIter->iSttBlk < taosArrayGetSize(pIter->aSttBlk); pIter->iSttBlk++) {
SSttBlk* pSttBlk = (SSttBlk*)taosArrayGet(pIter->aSttBlk, pIter->iSttBlk);
if (pIter->dIter.iRow < pIter->dIter.bData.nRow) break;
if (pSttBlk->minVer > pReader->ever) continue;
if (pSttBlk->maxVer < pReader->sver) continue;
for (;;) {
if (pIter->dIter.iBlockIdx < taosArrayGetSize(pIter->dIter.aBlockIdx)) {
SBlockIdx* pBlockIdx = taosArrayGet(pIter->dIter.aBlockIdx, pIter->dIter.iBlockIdx);
code = tsdbReadSttBlockEx(pReader->pDataFReader, iStt, pSttBlk, &pIter->bData);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbReadDataBlk(pIter->dIter.pReader, pBlockIdx, &pIter->dIter.mDataBlk);
TSDB_CHECK_CODE(code, lino, _exit);
for (pIter->iRow = 0; pIter->iRow < pIter->bData.nRow; pIter->iRow++) {
int64_t rowVer = pIter->bData.aVersion[pIter->iRow];
pIter->dIter.iBlockIdx++;
pIter->dIter.iDataBlk = 0;
if (rowVer >= pReader->sver && rowVer <= pReader->ever) {
pIter->rInfo.suid = pIter->bData.suid;
pIter->rInfo.uid = pIter->bData.uid ? pIter->bData.uid : pIter->bData.aUid[pIter->iRow];
pIter->rInfo.row = tsdbRowFromBlockData(&pIter->bData, pIter->iRow);
goto _add_iter;
break;
} else {
pIter->rowInfo = (SRowInfo){0};
goto _exit;
}
}
}
continue;
_add_iter:
tRBTreePut(&pReader->rbt, (SRBTreeNode*)pIter);
pIter++;
}
_exit:
if (code) {
tsdbError("vgId:%d, %s failed since %s", TD_VID(pReader->pTsdb->pVnode), __func__, tstrerror(code));
} else {
tsdbInfo("vgId:%d, %s done, path:%s, fid:%d", TD_VID(pReader->pTsdb->pVnode), __func__, pReader->pTsdb->path,
pReader->fid);
tsdbError("%s failed at line %d since %s", __func__, lino, tstrerror(code));
}
return code;
}
static int32_t tsdbSnapNextRow(STsdbSnapReader* pReader) {
static int32_t tsdbSttFileDataIterNext(STsdbDataIter2* pIter, STsdbFilterInfo* pFilterInfo) {
int32_t code = 0;
int32_t lino = 0;
if (pReader->pIter) {
SFDataIter* pIter = NULL;
while (true) {
_find_row:
pIter = pReader->pIter;
for (pIter->iRow++; pIter->iRow < pIter->bData.nRow; pIter->iRow++) {
int64_t rowVer = pIter->bData.aVersion[pIter->iRow];
if (rowVer >= pReader->sver && rowVer <= pReader->ever) {
pIter->rInfo.suid = pIter->bData.suid;
pIter->rInfo.uid = pIter->bData.uid ? pIter->bData.uid : pIter->bData.aUid[pIter->iRow];
pIter->rInfo.row = tsdbRowFromBlockData(&pIter->bData, pIter->iRow);
goto _out;
for (;;) {
while (pIter->sIter.iRow < pIter->sIter.bData.nRow) {
if (pFilterInfo) {
if (pFilterInfo->flag & TSDB_FILTER_FLAG_BY_VERSION) {
if (pFilterInfo->sver > pIter->sIter.bData.aVersion[pIter->sIter.iRow] ||
pFilterInfo->ever < pIter->sIter.bData.aVersion[pIter->sIter.iRow]) {
pIter->sIter.iRow++;
continue;
}
}
}
if (pIter->type == SNAP_DATA_FILE_ITER) {
while (true) {
for (pIter->iBlock++; pIter->iBlock < pIter->mBlock.nItem; pIter->iBlock++) {
SDataBlk dataBlk;
tMapDataGetItemByIdx(&pIter->mBlock, pIter->iBlock, &dataBlk, tGetDataBlk);
if (dataBlk.minVer > pReader->ever || dataBlk.maxVer < pReader->sver) continue;
pIter->rowInfo.suid = pIter->sIter.bData.suid;
pIter->rowInfo.uid = pIter->sIter.bData.uid ? pIter->sIter.bData.uid : pIter->sIter.bData.aUid[pIter->sIter.iRow];
pIter->rowInfo.row = tsdbRowFromBlockData(&pIter->sIter.bData, pIter->sIter.iRow);
pIter->sIter.iRow++;
goto _exit;
}
code = tsdbReadDataBlockEx(pReader->pDataFReader, &dataBlk, &pIter->bData);
if (code) goto _err;
for (;;) {
if (pIter->sIter.iSttBlk < taosArrayGetSize(pIter->sIter.aSttBlk)) {
SSttBlk* pSttBlk = taosArrayGet(pIter->sIter.aSttBlk, pIter->sIter.iSttBlk);
pIter->iRow = -1;
goto _find_row;
if (pFilterInfo) {
if (pFilterInfo->flag & TSDB_FILTER_FLAG_BY_VERSION) {
if (pFilterInfo->sver > pSttBlk->maxVer || pFilterInfo->ever < pSttBlk->minVer) {
pIter->sIter.iSttBlk++;
continue;
}
}
pIter->iBlockIdx++;
if (pIter->iBlockIdx >= taosArrayGetSize(pIter->aBlockIdx)) break;
pIter->pBlockIdx = (SBlockIdx*)taosArrayGet(pIter->aBlockIdx, pIter->iBlockIdx);
code = tsdbReadDataBlk(pReader->pDataFReader, pIter->pBlockIdx, &pIter->mBlock);
if (code) goto _err;
pIter->iBlock = -1;
}
pReader->pIter = NULL;
code = tsdbReadSttBlockEx(pIter->sIter.pReader, pIter->sIter.iStt, pSttBlk, &pIter->sIter.bData);
TSDB_CHECK_CODE(code, lino, _exit);
pIter->sIter.iRow = 0;
pIter->sIter.iSttBlk++;
break;
} else if (pIter->type == SNAP_STT_FILE_ITER) {
for (pIter->iSttBlk++; pIter->iSttBlk < taosArrayGetSize(pIter->aSttBlk); pIter->iSttBlk++) {
SSttBlk* pSttBlk = (SSttBlk*)taosArrayGet(pIter->aSttBlk, pIter->iSttBlk);
} else {
pIter->rowInfo = (SRowInfo){0};
goto _exit;
}
}
}
_exit:
if (code) {
tsdbError("%s failed at line %d since %s", __func__, lino, tstrerror(code));
}
return code;
}
if (pSttBlk->minVer > pReader->ever || pSttBlk->maxVer < pReader->sver) continue;
static int32_t tsdbTombFileDataIterNext(STsdbDataIter2* pIter, STsdbFilterInfo* pFilterInfo) {
int32_t code = 0;
int32_t lino = 0;
code = tsdbReadSttBlockEx(pReader->pDataFReader, pIter->iStt, pSttBlk, &pIter->bData);
if (code) goto _err;
for (;;) {
while (pIter->tIter.iDelData < taosArrayGetSize(pIter->tIter.aDelData)) {
SDelData* pDelData = taosArrayGet(pIter->tIter.aDelData, pIter->tIter.iDelData);
pIter->iRow = -1;
goto _find_row;
if (pFilterInfo) {
if (pFilterInfo->flag & TSDB_FILTER_FLAG_BY_VERSION) {
if (pFilterInfo->sver > pDelData->version || pFilterInfo->ever < pDelData->version) {
pIter->tIter.iDelData++;
continue;
}
}
}
pIter->delInfo.delData = *pDelData;
pIter->tIter.iDelData++;
goto _exit;
}
for (;;) {
if (pIter->tIter.iDelIdx < taosArrayGetSize(pIter->tIter.aDelIdx)) {
SDelIdx* pDelIdx = taosArrayGet(pIter->tIter.aDelIdx, pIter->tIter.iDelIdx);
code = tsdbReadDelData(pIter->tIter.pReader, pDelIdx, pIter->tIter.aDelData);
TSDB_CHECK_CODE(code, lino, _exit);
pReader->pIter = NULL;
pIter->delInfo.suid = pDelIdx->suid;
pIter->delInfo.uid = pDelIdx->uid;
pIter->tIter.iDelData = 0;
pIter->tIter.iDelIdx++;
break;
} else {
ASSERT(0);
pIter->delInfo = (SDelInfo){0};
goto _exit;
}
}
}
_out:
pIter = (SFDataIter*)tRBTreeMin(&pReader->rbt);
if (pReader->pIter && pIter) {
int32_t c = tRowInfoCmprFn(&pReader->pIter->rInfo, &pIter->rInfo);
if (c > 0) {
tRBTreePut(&pReader->rbt, (SRBTreeNode*)pReader->pIter);
pReader->pIter = NULL;
} else {
ASSERT(c);
}
_exit:
if (code) {
tsdbError("%s failed at line %d since %s", __func__, lino, tstrerror(code));
}
return code;
}
static int32_t tsdbDataIterNext2(STsdbDataIter2* pIter, STsdbFilterInfo* pFilterInfo) {
int32_t code = 0;
if (pIter->type == TSDB_MEM_TABLE_DATA_ITER) {
ASSERT(0);
return code;
} else if (pIter->type == TSDB_DATA_FILE_DATA_ITER) {
return tsdbDataFileDataIterNext(pIter, pFilterInfo);
} else if (pIter->type == TSDB_STT_FILE_DATA_ITER) {
return tsdbSttFileDataIterNext(pIter, pFilterInfo);
} else if (pIter->type == TSDB_TOMB_FILE_DATA_ITER) {
return tsdbTombFileDataIterNext(pIter, pFilterInfo);
} else {
ASSERT(0);
return code;
}
}
/* get */
// STsdbSnapReader ========================================
struct STsdbSnapReader {
STsdb* pTsdb;
int64_t sver;
int64_t ever;
int8_t type;
uint8_t* aBuf[5];
STsdbFS fs;
TABLEID tbid;
SSkmInfo skmTable;
// timeseries data
int8_t dataDone;
int32_t fid;
SDataFReader* pDataFReader;
STsdbDataIter2* iterList;
STsdbDataIter2* pIter;
SRBTree rbt;
SBlockData bData;
// tombstone data
int8_t delDone;
SDelFReader* pDelFReader;
STsdbDataIter2* pTIter;
SArray* aDelData;
};
static int32_t tsdbSnapReadFileDataStart(STsdbSnapReader* pReader) {
int32_t code = 0;
int32_t lino = 0;
SDFileSet* pSet = taosArraySearch(pReader->fs.aDFileSet, &(SDFileSet){.fid = pReader->fid}, tDFileSetCmprFn, TD_GT);
if (pSet == NULL) {
pReader->fid = INT32_MAX;
goto _exit;
}
pReader->fid = pSet->fid;
tRBTreeCreate(&pReader->rbt, tsdbDataIterCmprFn);
code = tsdbDataFReaderOpen(&pReader->pDataFReader, pReader->pTsdb, pSet);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbOpenDataFileDataIter(pReader->pDataFReader, &pReader->pIter);
TSDB_CHECK_CODE(code, lino, _exit);
if (pReader->pIter) {
// iter to next with filter info (sver, ever)
code = tsdbDataIterNext2(pReader->pIter,
&(STsdbFilterInfo){.flag = TSDB_FILTER_FLAG_BY_VERSION, // flag
.sver = pReader->sver,
.ever = pReader->ever});
TSDB_CHECK_CODE(code, lino, _exit);
if (pReader->pIter->rowInfo.suid || pReader->pIter->rowInfo.uid) {
// add to rbtree
tRBTreePut(&pReader->rbt, &pReader->pIter->rbtn);
// add to iterList
pReader->pIter->next = pReader->iterList;
pReader->iterList = pReader->pIter;
} else {
tsdbCloseDataIter2(pReader->pIter);
}
}
if (pReader->pIter == NULL) {
pReader->pIter = (SFDataIter*)tRBTreeMin(&pReader->rbt);
for (int32_t iStt = 0; iStt < pSet->nSttF; ++iStt) {
code = tsdbOpenSttFileDataIter(pReader->pDataFReader, iStt, &pReader->pIter);
TSDB_CHECK_CODE(code, lino, _exit);
if (pReader->pIter) {
tRBTreeDrop(&pReader->rbt, (SRBTreeNode*)pReader->pIter);
// iter to valid row
code = tsdbDataIterNext2(pReader->pIter,
&(STsdbFilterInfo){.flag = TSDB_FILTER_FLAG_BY_VERSION, // flag
.sver = pReader->sver,
.ever = pReader->ever});
TSDB_CHECK_CODE(code, lino, _exit);
if (pReader->pIter->rowInfo.suid || pReader->pIter->rowInfo.uid) {
// add to rbtree
tRBTreePut(&pReader->rbt, &pReader->pIter->rbtn);
// add to iterList
pReader->pIter->next = pReader->iterList;
pReader->iterList = pReader->pIter;
} else {
tsdbCloseDataIter2(pReader->pIter);
}
}
}
return code;
pReader->pIter = NULL;
_err:
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pReader->pTsdb->pVnode), __func__, lino, tstrerror(code));
} else {
tsdbInfo("vgId:%d %s done, fid:%d", TD_VID(pReader->pTsdb->pVnode), __func__, pReader->fid);
}
return code;
}
static SRowInfo* tsdbSnapGetRow(STsdbSnapReader* pReader) {
static void tsdbSnapReadFileDataEnd(STsdbSnapReader* pReader) {
while (pReader->iterList) {
STsdbDataIter2* pIter = pReader->iterList;
pReader->iterList = pIter->next;
tsdbCloseDataIter2(pIter);
}
tsdbDataFReaderClose(&pReader->pDataFReader);
}
static int32_t tsdbSnapReadNextRow(STsdbSnapReader* pReader, SRowInfo** ppRowInfo) {
int32_t code = 0;
int32_t lino = 0;
if (pReader->pIter) {
return &pReader->pIter->rInfo;
} else {
tsdbSnapNextRow(pReader);
code = tsdbDataIterNext2(pReader->pIter, &(STsdbFilterInfo){.flag = TSDB_FILTER_FLAG_BY_VERSION, // flag
.sver = pReader->sver,
.ever = pReader->ever});
TSDB_CHECK_CODE(code, lino, _exit);
if (pReader->pIter->rowInfo.suid == 0 && pReader->pIter->rowInfo.uid == 0) {
pReader->pIter = NULL;
} else {
SRBTreeNode* pNode = tRBTreeMin(&pReader->rbt);
if (pNode) {
int32_t c = tsdbDataIterCmprFn(&pReader->pIter->rbtn, pNode);
if (c > 0) {
tRBTreePut(&pReader->rbt, &pReader->pIter->rbtn);
pReader->pIter = NULL;
} else if (c == 0) {
ASSERT(0);
}
}
}
}
if (pReader->pIter == NULL) {
SRBTreeNode* pNode = tRBTreeMin(&pReader->rbt);
if (pNode) {
tRBTreeDrop(&pReader->rbt, pNode);
pReader->pIter = TSDB_RBTN_TO_DATA_ITER(pNode);
}
}
if (ppRowInfo) {
if (pReader->pIter) {
return &pReader->pIter->rInfo;
*ppRowInfo = &pReader->pIter->rowInfo;
} else {
return NULL;
*ppRowInfo = NULL;
}
}
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pReader->pTsdb->pVnode), __func__, lino, tstrerror(code));
}
return code;
}
static int32_t tsdbSnapReadGetRow(STsdbSnapReader* pReader, SRowInfo** ppRowInfo) {
if (pReader->pIter) {
*ppRowInfo = &pReader->pIter->rowInfo;
return 0;
}
return tsdbSnapReadNextRow(pReader, ppRowInfo);
}
static int32_t tsdbSnapCmprData(STsdbSnapReader* pReader, uint8_t** ppData) {
......@@ -318,155 +672,213 @@ _exit:
return code;
}
static int32_t tsdbSnapReadData(STsdbSnapReader* pReader, uint8_t** ppData) {
static int32_t tsdbSnapReadTimeSeriesData(STsdbSnapReader* pReader, uint8_t** ppData) {
int32_t code = 0;
int32_t lino = 0;
STsdb* pTsdb = pReader->pTsdb;
while (true) {
tBlockDataReset(&pReader->bData);
for (;;) {
// start a new file read if need
if (pReader->pDataFReader == NULL) {
code = tsdbSnapReadOpenFile(pReader);
code = tsdbSnapReadFileDataStart(pReader);
TSDB_CHECK_CODE(code, lino, _exit);
}
if (pReader->pDataFReader == NULL) break;
SRowInfo* pRowInfo = tsdbSnapGetRow(pReader);
SRowInfo* pRowInfo;
code = tsdbSnapReadGetRow(pReader, &pRowInfo);
TSDB_CHECK_CODE(code, lino, _exit);
if (pRowInfo == NULL) {
tsdbDataFReaderClose(&pReader->pDataFReader);
tsdbSnapReadFileDataEnd(pReader);
continue;
}
TABLEID id = {.suid = pRowInfo->suid, .uid = pRowInfo->uid};
SBlockData* pBlockData = &pReader->bData;
code = tsdbUpdateTableSchema(pTsdb->pVnode->pMeta, id.suid, id.uid, &pReader->skmTable);
code = tsdbUpdateTableSchema(pTsdb->pVnode->pMeta, pRowInfo->suid, pRowInfo->uid, &pReader->skmTable);
TSDB_CHECK_CODE(code, lino, _exit);
code = tBlockDataInit(pBlockData, &id, pReader->skmTable.pTSchema, NULL, 0);
code = tBlockDataInit(&pReader->bData, (TABLEID*)pRowInfo, pReader->skmTable.pTSchema, NULL, 0);
TSDB_CHECK_CODE(code, lino, _exit);
while (pRowInfo->suid == id.suid && pRowInfo->uid == id.uid) {
code = tBlockDataAppendRow(pBlockData, &pRowInfo->row, NULL, pRowInfo->uid);
do {
if (!TABLE_SAME_SCHEMA(pReader->bData.suid, pReader->bData.uid, pRowInfo->suid, pRowInfo->uid)) break;
if (pReader->bData.uid && pReader->bData.uid != pRowInfo->uid) {
code = tRealloc((uint8_t**)&pReader->bData.aUid, sizeof(int64_t) * (pReader->bData.nRow + 1));
TSDB_CHECK_CODE(code, lino, _exit);
for (int32_t iRow = 0; iRow < pReader->bData.nRow; ++iRow) {
pReader->bData.aUid[iRow] = pReader->bData.uid;
}
pReader->bData.uid = 0;
}
code = tBlockDataAppendRow(&pReader->bData, &pRowInfo->row, NULL, pRowInfo->uid);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbSnapNextRow(pReader);
code = tsdbSnapReadNextRow(pReader, &pRowInfo);
TSDB_CHECK_CODE(code, lino, _exit);
pRowInfo = tsdbSnapGetRow(pReader);
if (pRowInfo == NULL) {
tsdbDataFReaderClose(&pReader->pDataFReader);
break;
}
if (pReader->bData.nRow >= 4096) break;
} while (pRowInfo);
ASSERT(pReader->bData.nRow > 0);
break;
}
if (pReader->bData.nRow > 0) {
code = tsdbSnapCmprData(pReader, ppData);
TSDB_CHECK_CODE(code, lino, _exit);
}
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code));
}
return code;
}
static int32_t tsdbSnapCmprTombData(STsdbSnapReader* pReader, uint8_t** ppData) {
int32_t code = 0;
int32_t lino = 0;
int64_t size = sizeof(TABLEID);
for (int32_t iDelData = 0; iDelData < taosArrayGetSize(pReader->aDelData); ++iDelData) {
size += tPutDelData(NULL, taosArrayGet(pReader->aDelData, iDelData));
}
uint8_t* pData = (uint8_t*)taosMemoryMalloc(sizeof(SSnapDataHdr) + size);
if (pData == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
SSnapDataHdr* pHdr = (SSnapDataHdr*)pData;
pHdr->type = SNAP_DATA_DEL;
pHdr->size = size;
TABLEID* pId = (TABLEID*)(pData + sizeof(SSnapDataHdr));
*pId = pReader->tbid;
size = sizeof(SSnapDataHdr) + sizeof(TABLEID);
for (int32_t iDelData = 0; iDelData < taosArrayGetSize(pReader->aDelData); ++iDelData) {
size += tPutDelData(pData + size, taosArrayGet(pReader->aDelData, iDelData));
}
if (pBlockData->nRow >= 4096) break;
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pReader->pTsdb->pVnode), __func__, lino, tstrerror(code));
if (pData) {
taosMemoryFree(pData);
pData = NULL;
}
}
*ppData = pData;
return code;
}
code = tsdbSnapCmprData(pReader, ppData);
TSDB_CHECK_CODE(code, lino, _exit);
static void tsdbSnapReadGetTombData(STsdbSnapReader* pReader, SDelInfo** ppDelInfo) {
if (pReader->pTIter == NULL || (pReader->pTIter->delInfo.suid == 0 && pReader->pTIter->delInfo.uid == 0)) {
*ppDelInfo = NULL;
} else {
*ppDelInfo = &pReader->pTIter->delInfo;
}
}
break;
static int32_t tsdbSnapReadNextTombData(STsdbSnapReader* pReader, SDelInfo** ppDelInfo) {
int32_t code = 0;
int32_t lino = 0;
code = tsdbDataIterNext2(
pReader->pTIter,
&(STsdbFilterInfo){.flag = TSDB_FILTER_FLAG_BY_VERSION, .sver = pReader->sver, .ever = pReader->ever});
TSDB_CHECK_CODE(code, lino, _exit);
if (ppDelInfo) {
tsdbSnapReadGetTombData(pReader, ppDelInfo);
}
_exit:
if (code) {
tsdbError("vgId:%d, %s failed since %s, path:%s", TD_VID(pTsdb->pVnode), __func__, tstrerror(code), pTsdb->path);
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pReader->pTsdb->pVnode), __func__, lino, tstrerror(code));
}
return code;
}
static int32_t tsdbSnapReadDel(STsdbSnapReader* pReader, uint8_t** ppData) {
static int32_t tsdbSnapReadTombData(STsdbSnapReader* pReader, uint8_t** ppData) {
int32_t code = 0;
int32_t lino = 0;
STsdb* pTsdb = pReader->pTsdb;
SDelFile* pDelFile = pReader->fs.pDelFile;
STsdb* pTsdb = pReader->pTsdb;
// open tombstone data iter if need
if (pReader->pDelFReader == NULL) {
if (pDelFile == NULL) {
goto _exit;
}
if (pReader->fs.pDelFile == NULL) goto _exit;
// open
code = tsdbDelFReaderOpen(&pReader->pDelFReader, pDelFile, pTsdb);
code = tsdbDelFReaderOpen(&pReader->pDelFReader, pReader->fs.pDelFile, pTsdb);
TSDB_CHECK_CODE(code, lino, _exit);
// read index
code = tsdbReadDelIdx(pReader->pDelFReader, pReader->aDelIdx);
code = tsdbOpenTombFileDataIter(pReader->pDelFReader, &pReader->pTIter);
TSDB_CHECK_CODE(code, lino, _exit);
pReader->iDelIdx = 0;
if (pReader->pTIter) {
code = tsdbSnapReadNextTombData(pReader, NULL);
TSDB_CHECK_CODE(code, lino, _exit);
}
}
while (true) {
if (pReader->iDelIdx >= taosArrayGetSize(pReader->aDelIdx)) {
tsdbDelFReaderClose(&pReader->pDelFReader);
break;
}
// loop to get tombstone data
SDelInfo* pDelInfo;
tsdbSnapReadGetTombData(pReader, &pDelInfo);
SDelIdx* pDelIdx = (SDelIdx*)taosArrayGet(pReader->aDelIdx, pReader->iDelIdx);
if (pDelInfo == NULL) goto _exit;
pReader->iDelIdx++;
pReader->tbid = *(TABLEID*)pDelInfo;
code = tsdbReadDelData(pReader->pDelFReader, pDelIdx, pReader->aDelData);
if (pReader->aDelData) {
taosArrayClear(pReader->aDelData);
} else if ((pReader->aDelData = taosArrayInit(16, sizeof(SDelData))) == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
int32_t size = 0;
for (int32_t iDelData = 0; iDelData < taosArrayGetSize(pReader->aDelData); iDelData++) {
SDelData* pDelData = (SDelData*)taosArrayGet(pReader->aDelData, iDelData);
if (pDelData->version >= pReader->sver && pDelData->version <= pReader->ever) {
size += tPutDelData(NULL, pDelData);
}
}
if (size == 0) continue;
// org data
size = sizeof(TABLEID) + size;
*ppData = taosMemoryMalloc(sizeof(SSnapDataHdr) + size);
if (*ppData == NULL) {
while (pDelInfo && pDelInfo->suid == pReader->tbid.suid && pDelInfo->uid == pReader->tbid.uid) {
if (taosArrayPush(pReader->aDelData, &pDelInfo->delData) < 0) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
SSnapDataHdr* pHdr = (SSnapDataHdr*)(*ppData);
pHdr->type = SNAP_DATA_DEL;
pHdr->size = size;
TABLEID* pId = (TABLEID*)(&pHdr[1]);
pId->suid = pDelIdx->suid;
pId->uid = pDelIdx->uid;
int32_t n = sizeof(SSnapDataHdr) + sizeof(TABLEID);
for (int32_t iDelData = 0; iDelData < taosArrayGetSize(pReader->aDelData); iDelData++) {
SDelData* pDelData = (SDelData*)taosArrayGet(pReader->aDelData, iDelData);
if (pDelData->version < pReader->sver) continue;
if (pDelData->version > pReader->ever) continue;
n += tPutDelData((*ppData) + n, pDelData);
}
tsdbInfo("vgId:%d, vnode snapshot tsdb read del data for %s, suid:%" PRId64 " uid:%" PRId64 " size:%d",
TD_VID(pTsdb->pVnode), pTsdb->path, pDelIdx->suid, pDelIdx->uid, size);
code = tsdbSnapReadNextTombData(pReader, &pDelInfo);
TSDB_CHECK_CODE(code, lino, _exit);
}
break;
// encode tombstone data
if (taosArrayGetSize(pReader->aDelData) > 0) {
code = tsdbSnapCmprTombData(pReader, ppData);
TSDB_CHECK_CODE(code, lino, _exit);
}
_exit:
if (code) {
tsdbError("vgId:%d, %s failed since %s, path:%s", TD_VID(pTsdb->pVnode), __func__, tstrerror(code), pTsdb->path);
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code));
} else {
tsdbDebug("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__);
}
return code;
}
int32_t tsdbSnapReaderOpen(STsdb* pTsdb, int64_t sver, int64_t ever, int8_t type, STsdbSnapReader** ppReader) {
int32_t code = 0;
int32_t lino = 0;
STsdbSnapReader* pReader = NULL;
int32_t code = 0;
int32_t lino = 0;
// alloc
pReader = (STsdbSnapReader*)taosMemoryCalloc(1, sizeof(*pReader));
STsdbSnapReader* pReader = (STsdbSnapReader*)taosMemoryCalloc(1, sizeof(*pReader));
if (pReader == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
......@@ -476,118 +888,80 @@ int32_t tsdbSnapReaderOpen(STsdb* pTsdb, int64_t sver, int64_t ever, int8_t type
pReader->ever = ever;
pReader->type = type;
code = taosThreadRwlockRdlock(&pTsdb->rwLock);
if (code) {
code = TAOS_SYSTEM_ERROR(code);
TSDB_CHECK_CODE(code, lino, _exit);
}
taosThreadRwlockRdlock(&pTsdb->rwLock);
code = tsdbFSRef(pTsdb, &pReader->fs);
if (code) {
taosThreadRwlockUnlock(&pTsdb->rwLock);
TSDB_CHECK_CODE(code, lino, _exit);
}
taosThreadRwlockUnlock(&pTsdb->rwLock);
code = taosThreadRwlockUnlock(&pTsdb->rwLock);
if (code) {
code = TAOS_SYSTEM_ERROR(code);
TSDB_CHECK_CODE(code, lino, _exit);
}
// data
// init
pReader->fid = INT32_MIN;
for (int32_t iIter = 0; iIter < sizeof(pReader->aFDataIter) / sizeof(pReader->aFDataIter[0]); iIter++) {
SFDataIter* pIter = &pReader->aFDataIter[iIter];
if (iIter == 0) {
pIter->aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx));
if (pIter->aBlockIdx == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
} else {
pIter->aSttBlk = taosArrayInit(0, sizeof(SSttBlk));
if (pIter->aSttBlk == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
}
code = tBlockDataCreate(&pIter->bData);
TSDB_CHECK_CODE(code, lino, _exit);
}
code = tBlockDataCreate(&pReader->bData);
TSDB_CHECK_CODE(code, lino, _exit);
// del
pReader->aDelIdx = taosArrayInit(0, sizeof(SDelIdx));
if (pReader->aDelIdx == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
pReader->aDelData = taosArrayInit(0, sizeof(SDelData));
if (pReader->aDelData == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
_exit:
if (code) {
tsdbError("vgId:%d, %s failed at line %d since %s, TSDB path: %s", TD_VID(pTsdb->pVnode), __func__, lino,
tstrerror(code), pTsdb->path);
*ppReader = NULL;
tsdbError("vgId:%d %s failed at line %d since %s, sver:%" PRId64 " ever:%" PRId64 " type:%d", TD_VID(pTsdb->pVnode),
__func__, lino, tstrerror(code), sver, ever, type);
if (pReader) {
taosArrayDestroy(pReader->aDelData);
taosArrayDestroy(pReader->aDelIdx);
tBlockDataDestroy(&pReader->bData, 1);
tsdbFSDestroy(&pReader->fs);
tsdbFSUnref(pTsdb, &pReader->fs);
taosMemoryFree(pReader);
pReader = NULL;
}
} else {
*ppReader = pReader;
tsdbInfo("vgId:%d, vnode snapshot tsdb reader opened for %s", TD_VID(pTsdb->pVnode), pTsdb->path);
tsdbInfo("vgId:%d %s done, sver:%" PRId64 " ever:%" PRId64 " type:%d", TD_VID(pTsdb->pVnode), __func__, sver, ever,
type);
}
*ppReader = pReader;
return code;
}
int32_t tsdbSnapReaderClose(STsdbSnapReader** ppReader) {
int32_t code = 0;
STsdbSnapReader* pReader = *ppReader;
// data
if (pReader->pDataFReader) tsdbDataFReaderClose(&pReader->pDataFReader);
for (int32_t iIter = 0; iIter < sizeof(pReader->aFDataIter) / sizeof(pReader->aFDataIter[0]); iIter++) {
SFDataIter* pIter = &pReader->aFDataIter[iIter];
int32_t code = 0;
int32_t lino = 0;
if (iIter == 0) {
taosArrayDestroy(pIter->aBlockIdx);
tMapDataClear(&pIter->mBlock);
} else {
taosArrayDestroy(pIter->aSttBlk);
}
STsdbSnapReader* pReader = *ppReader;
STsdb* pTsdb = pReader->pTsdb;
tBlockDataDestroy(&pIter->bData, 1);
// tombstone
if (pReader->pTIter) {
tsdbCloseDataIter2(pReader->pTIter);
pReader->pTIter = NULL;
}
if (pReader->pDelFReader) {
tsdbDelFReaderClose(&pReader->pDelFReader);
}
taosArrayDestroy(pReader->aDelData);
// timeseries
while (pReader->iterList) {
STsdbDataIter2* pIter = pReader->iterList;
pReader->iterList = pIter->next;
tsdbCloseDataIter2(pIter);
}
if (pReader->pDataFReader) {
tsdbDataFReaderClose(&pReader->pDataFReader);
}
tBlockDataDestroy(&pReader->bData, 1);
tDestroyTSchema(pReader->skmTable.pTSchema);
// del
if (pReader->pDelFReader) tsdbDelFReaderClose(&pReader->pDelFReader);
taosArrayDestroy(pReader->aDelIdx);
taosArrayDestroy(pReader->aDelData);
// other
tDestroyTSchema(pReader->skmTable.pTSchema);
tsdbFSUnref(pReader->pTsdb, &pReader->fs);
tsdbInfo("vgId:%d, vnode snapshot tsdb reader closed for %s", TD_VID(pReader->pTsdb->pVnode), pReader->pTsdb->path);
for (int32_t iBuf = 0; iBuf < sizeof(pReader->aBuf) / sizeof(pReader->aBuf[0]); iBuf++) {
tFree(pReader->aBuf[iBuf]);
}
taosMemoryFree(pReader);
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code));
} else {
tsdbDebug("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__);
}
*ppReader = NULL;
return code;
}
......@@ -600,7 +974,7 @@ int32_t tsdbSnapRead(STsdbSnapReader* pReader, uint8_t** ppData) {
// read data file
if (!pReader->dataDone) {
code = tsdbSnapReadData(pReader, ppData);
code = tsdbSnapReadTimeSeriesData(pReader, ppData);
TSDB_CHECK_CODE(code, lino, _exit);
if (*ppData) {
goto _exit;
......@@ -611,7 +985,7 @@ int32_t tsdbSnapRead(STsdbSnapReader* pReader, uint8_t** ppData) {
// read del file
if (!pReader->delDone) {
code = tsdbSnapReadDel(pReader, ppData);
code = tsdbSnapReadTombData(pReader, ppData);
TSDB_CHECK_CODE(code, lino, _exit);
if (*ppData) {
goto _exit;
......@@ -622,22 +996,18 @@ int32_t tsdbSnapRead(STsdbSnapReader* pReader, uint8_t** ppData) {
_exit:
if (code) {
tsdbError("vgId:%d, %s failed since %s, path:%s", TD_VID(pReader->pTsdb->pVnode), __func__, tstrerror(code),
pReader->pTsdb->path);
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pReader->pTsdb->pVnode), __func__, lino, tstrerror(code));
} else {
tsdbDebug("vgId:%d, %s done, path:%s", TD_VID(pReader->pTsdb->pVnode), __func__, pReader->pTsdb->path);
tsdbDebug("vgId:%d %s done", TD_VID(pReader->pTsdb->pVnode), __func__);
}
return code;
}
// STsdbSnapWriter ========================================
struct STsdbSnapWriter {
STsdb* pTsdb;
int64_t sver;
int64_t ever;
STsdbFS fs;
// config
STsdb* pTsdb;
int64_t sver;
int64_t ever;
int32_t minutes;
int8_t precision;
int32_t minRow;
......@@ -646,641 +1016,816 @@ struct STsdbSnapWriter {
int64_t commitID;
uint8_t* aBuf[5];
// for data file
SBlockData bData;
int32_t fid;
TABLEID id;
SSkmInfo skmTable;
struct {
SDataFReader* pReader;
SArray* aBlockIdx;
int32_t iBlockIdx;
SBlockIdx* pBlockIdx;
SMapData mDataBlk;
int32_t iDataBlk;
SBlockData bData;
int32_t iRow;
} dReader;
struct {
SDataFWriter* pWriter;
SArray* aBlockIdx;
SMapData mDataBlk;
SArray* aSttBlk;
SBlockData bData;
SBlockData sData;
} dWriter;
// for del file
SDelFReader* pDelFReader;
STsdbFS fs;
TABLEID tbid;
// time-series data
SBlockData inData;
int32_t fid;
SSkmInfo skmTable;
/* reader */
SDataFReader* pDataFReader;
STsdbDataIter2* iterList;
STsdbDataIter2* pDIter;
STsdbDataIter2* pSIter;
SRBTree rbt; // SRBTree<STsdbDataIter2>
/* writer */
SDataFWriter* pDataFWriter;
SArray* aBlockIdx;
SMapData mDataBlk; // SMapData<SDataBlk>
SArray* aSttBlk; // SArray<SSttBlk>
SBlockData bData;
SBlockData sData;
// tombstone data
/* reader */
SDelFReader* pDelFReader;
STsdbDataIter2* pTIter;
/* writer */
SDelFWriter* pDelFWriter;
int32_t iDelIdx;
SArray* aDelIdxR;
SArray* aDelIdx;
SArray* aDelData;
SArray* aDelIdxW;
};
// SNAP_DATA_TSDB
extern int32_t tsdbWriteDataBlock(SDataFWriter* pWriter, SBlockData* pBlockData, SMapData* mDataBlk, int8_t cmprAlg);
extern int32_t tsdbWriteSttBlock(SDataFWriter* pWriter, SBlockData* pBlockData, SArray* aSttBlk, int8_t cmprAlg);
static int32_t tsdbSnapNextTableData(STsdbSnapWriter* pWriter) {
static int32_t tsdbSnapWriteTableDataStart(STsdbSnapWriter* pWriter, TABLEID* pId) {
int32_t code = 0;
int32_t lino = 0;
if (pId) {
pWriter->tbid = *pId;
} else {
pWriter->tbid = (TABLEID){INT64_MAX, INT64_MAX};
}
if (pWriter->pDIter) {
STsdbDataIter2* pIter = pWriter->pDIter;
// assert last table data end
ASSERT(pIter->dIter.iRow >= pIter->dIter.bData.nRow);
ASSERT(pIter->dIter.iDataBlk >= pIter->dIter.mDataBlk.nItem);
for (;;) {
if (pIter->dIter.iBlockIdx >= taosArrayGetSize(pIter->dIter.aBlockIdx)) {
pWriter->pDIter = NULL;
break;
}
SBlockIdx* pBlockIdx = (SBlockIdx*)taosArrayGet(pIter->dIter.aBlockIdx, pIter->dIter.iBlockIdx);
int32_t c = tTABLEIDCmprFn(pBlockIdx, &pWriter->tbid);
if (c < 0) {
code = tsdbReadDataBlk(pIter->dIter.pReader, pBlockIdx, &pIter->dIter.mDataBlk);
TSDB_CHECK_CODE(code, lino, _exit);
SBlockIdx* pNewBlockIdx = taosArrayReserve(pWriter->aBlockIdx, 1);
if (pNewBlockIdx == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
pNewBlockIdx->suid = pBlockIdx->suid;
pNewBlockIdx->uid = pBlockIdx->uid;
code = tsdbWriteDataBlk(pWriter->pDataFWriter, &pIter->dIter.mDataBlk, pNewBlockIdx);
TSDB_CHECK_CODE(code, lino, _exit);
pIter->dIter.iBlockIdx++;
} else if (c == 0) {
code = tsdbReadDataBlk(pIter->dIter.pReader, pBlockIdx, &pIter->dIter.mDataBlk);
TSDB_CHECK_CODE(code, lino, _exit);
pIter->dIter.iDataBlk = 0;
pIter->dIter.iBlockIdx++;
break;
} else {
pIter->dIter.iDataBlk = pIter->dIter.mDataBlk.nItem;
break;
}
}
}
if (pId) {
code = tsdbUpdateTableSchema(pWriter->pTsdb->pVnode->pMeta, pId->suid, pId->uid, &pWriter->skmTable);
TSDB_CHECK_CODE(code, lino, _exit);
tMapDataReset(&pWriter->mDataBlk);
ASSERT(pWriter->dReader.iRow >= pWriter->dReader.bData.nRow);
code = tBlockDataInit(&pWriter->bData, pId, pWriter->skmTable.pTSchema, NULL, 0);
TSDB_CHECK_CODE(code, lino, _exit);
}
if (pWriter->dReader.iBlockIdx < taosArrayGetSize(pWriter->dReader.aBlockIdx)) {
pWriter->dReader.pBlockIdx = (SBlockIdx*)taosArrayGet(pWriter->dReader.aBlockIdx, pWriter->dReader.iBlockIdx);
if (!TABLE_SAME_SCHEMA(pWriter->tbid.suid, pWriter->tbid.uid, pWriter->sData.suid, pWriter->sData.uid)) {
if ((pWriter->sData.nRow > 0)) {
code = tsdbWriteSttBlock(pWriter->pDataFWriter, &pWriter->sData, pWriter->aSttBlk, pWriter->cmprAlg);
TSDB_CHECK_CODE(code, lino, _exit);
}
code = tsdbReadDataBlk(pWriter->dReader.pReader, pWriter->dReader.pBlockIdx, &pWriter->dReader.mDataBlk);
if (code) goto _exit;
if (pId) {
TABLEID id = {.suid = pWriter->tbid.suid, .uid = pWriter->tbid.suid ? 0 : pWriter->tbid.uid};
code = tBlockDataInit(&pWriter->sData, &id, pWriter->skmTable.pTSchema, NULL, 0);
TSDB_CHECK_CODE(code, lino, _exit);
}
}
pWriter->dReader.iBlockIdx++;
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code));
} else {
pWriter->dReader.pBlockIdx = NULL;
tMapDataReset(&pWriter->dReader.mDataBlk);
tsdbTrace("vgId:%d %s done, suid:%" PRId64 " uid:%" PRId64, TD_VID(pWriter->pTsdb->pVnode), __func__,
pWriter->tbid.suid, pWriter->tbid.uid);
}
return code;
}
static int32_t tsdbSnapWriteTableRowImpl(STsdbSnapWriter* pWriter, TSDBROW* pRow) {
int32_t code = 0;
int32_t lino = 0;
code = tBlockDataAppendRow(&pWriter->bData, pRow, pWriter->skmTable.pTSchema, pWriter->tbid.uid);
TSDB_CHECK_CODE(code, lino, _exit);
if (pWriter->bData.nRow >= pWriter->maxRow) {
code = tsdbWriteDataBlock(pWriter->pDataFWriter, &pWriter->bData, &pWriter->mDataBlk, pWriter->cmprAlg);
TSDB_CHECK_CODE(code, lino, _exit);
}
pWriter->dReader.iDataBlk = 0; // point to the next one
tBlockDataReset(&pWriter->dReader.bData);
pWriter->dReader.iRow = 0;
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code));
}
return code;
}
static int32_t tsdbSnapWriteCopyData(STsdbSnapWriter* pWriter, TABLEID* pId) {
static int32_t tsdbSnapWriteTableRow(STsdbSnapWriter* pWriter, TSDBROW* pRow) {
int32_t code = 0;
int32_t lino = 0;
while (true) {
if (pWriter->dReader.pBlockIdx == NULL) break;
if (tTABLEIDCmprFn(pWriter->dReader.pBlockIdx, pId) >= 0) break;
TSDBKEY inKey = pRow ? TSDBROW_KEY(pRow) : TSDBKEY_MAX;
if (pWriter->pDIter == NULL || (pWriter->pDIter->dIter.iRow >= pWriter->pDIter->dIter.bData.nRow &&
pWriter->pDIter->dIter.iDataBlk >= pWriter->pDIter->dIter.mDataBlk.nItem)) {
goto _write_row;
} else {
for (;;) {
while (pWriter->pDIter->dIter.iRow < pWriter->pDIter->dIter.bData.nRow) {
TSDBROW row = tsdbRowFromBlockData(&pWriter->pDIter->dIter.bData, pWriter->pDIter->dIter.iRow);
int32_t c = tsdbKeyCmprFn(&inKey, &TSDBROW_KEY(&row));
if (c < 0) {
goto _write_row;
} else if (c > 0) {
code = tsdbSnapWriteTableRowImpl(pWriter, &row);
TSDB_CHECK_CODE(code, lino, _exit);
pWriter->pDIter->dIter.iRow++;
} else {
ASSERT(0);
}
}
SBlockIdx blkIdx = *pWriter->dReader.pBlockIdx;
code = tsdbWriteDataBlk(pWriter->dWriter.pWriter, &pWriter->dReader.mDataBlk, &blkIdx);
if (code) goto _exit;
for (;;) {
if (pWriter->pDIter->dIter.iDataBlk >= pWriter->pDIter->dIter.mDataBlk.nItem) goto _write_row;
if (taosArrayPush(pWriter->dWriter.aBlockIdx, &blkIdx) == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _exit;
// FIXME: Here can be slow, use array instead
SDataBlk dataBlk;
tMapDataGetItemByIdx(&pWriter->pDIter->dIter.mDataBlk, pWriter->pDIter->dIter.iDataBlk, &dataBlk, tGetDataBlk);
int32_t c = tDataBlkCmprFn(&dataBlk, &(SDataBlk){.minKey = inKey, .maxKey = inKey});
if (c > 0) {
goto _write_row;
} else if (c < 0) {
if (pWriter->bData.nRow > 0) {
code = tsdbWriteDataBlock(pWriter->pDataFWriter, &pWriter->bData, &pWriter->mDataBlk, pWriter->cmprAlg);
TSDB_CHECK_CODE(code, lino, _exit);
}
tMapDataPutItem(&pWriter->pDIter->dIter.mDataBlk, &dataBlk, tPutDataBlk);
pWriter->pDIter->dIter.iDataBlk++;
} else {
code = tsdbReadDataBlockEx(pWriter->pDataFReader, &dataBlk, &pWriter->pDIter->dIter.bData);
TSDB_CHECK_CODE(code, lino, _exit);
pWriter->pDIter->dIter.iRow = 0;
pWriter->pDIter->dIter.iDataBlk++;
break;
}
}
}
}
code = tsdbSnapNextTableData(pWriter);
if (code) goto _exit;
_write_row:
if (pRow) {
code = tsdbSnapWriteTableRowImpl(pWriter, pRow);
TSDB_CHECK_CODE(code, lino, _exit);
}
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code));
}
return code;
}
static int32_t tsdbSnapWriteTableDataStart(STsdbSnapWriter* pWriter, TABLEID* pId) {
static int32_t tsdbSnapWriteTableDataEnd(STsdbSnapWriter* pWriter) {
int32_t code = 0;
int32_t lino = 0;
// write a NULL row to end current table data write
code = tsdbSnapWriteTableRow(pWriter, NULL);
TSDB_CHECK_CODE(code, lino, _exit);
if (pWriter->bData.nRow > 0) {
if (pWriter->bData.nRow < pWriter->minRow) {
ASSERT(TABLE_SAME_SCHEMA(pWriter->sData.suid, pWriter->sData.uid, pWriter->tbid.suid, pWriter->tbid.uid));
for (int32_t iRow = 0; iRow < pWriter->bData.nRow; iRow++) {
code =
tBlockDataAppendRow(&pWriter->sData, &tsdbRowFromBlockData(&pWriter->bData, iRow), NULL, pWriter->tbid.uid);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbSnapWriteCopyData(pWriter, pId);
if (code) goto _err;
if (pWriter->sData.nRow >= pWriter->maxRow) {
code = tsdbWriteSttBlock(pWriter->pDataFWriter, &pWriter->sData, pWriter->aSttBlk, pWriter->cmprAlg);
TSDB_CHECK_CODE(code, lino, _exit);
}
}
pWriter->id.suid = pId->suid;
pWriter->id.uid = pId->uid;
tBlockDataClear(&pWriter->bData);
} else {
code = tsdbWriteDataBlock(pWriter->pDataFWriter, &pWriter->bData, &pWriter->mDataBlk, pWriter->cmprAlg);
TSDB_CHECK_CODE(code, lino, _exit);
}
}
code = tsdbUpdateTableSchema(pWriter->pTsdb->pVnode->pMeta, pId->suid, pId->uid, &pWriter->skmTable);
if (code) goto _err;
if (pWriter->mDataBlk.nItem) {
SBlockIdx* pBlockIdx = taosArrayReserve(pWriter->aBlockIdx, 1);
if (pBlockIdx == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
tMapDataReset(&pWriter->dWriter.mDataBlk);
code = tBlockDataInit(&pWriter->dWriter.bData, pId, pWriter->skmTable.pTSchema, NULL, 0);
if (code) goto _err;
pBlockIdx->suid = pWriter->tbid.suid;
pBlockIdx->uid = pWriter->tbid.uid;
return code;
code = tsdbWriteDataBlk(pWriter->pDataFWriter, &pWriter->mDataBlk, pBlockIdx);
TSDB_CHECK_CODE(code, lino, _exit);
}
_err:
tsdbError("vgId:%d, %s failed since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, tstrerror(code));
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code));
}
return code;
}
static int32_t tsdbSnapWriteTableDataEnd(STsdbSnapWriter* pWriter) {
static int32_t tsdbSnapWriteFileDataStart(STsdbSnapWriter* pWriter, int32_t fid) {
int32_t code = 0;
int32_t lino = 0;
if (pWriter->id.suid == 0 && pWriter->id.uid == 0) return code;
ASSERT(pWriter->pDataFWriter == NULL && pWriter->fid < fid);
int32_t c = 1;
if (pWriter->dReader.pBlockIdx) {
c = tTABLEIDCmprFn(pWriter->dReader.pBlockIdx, &pWriter->id);
ASSERT(c >= 0);
}
STsdb* pTsdb = pWriter->pTsdb;
pWriter->fid = fid;
pWriter->tbid = (TABLEID){0};
SDFileSet* pSet = taosArraySearch(pWriter->fs.aDFileSet, &(SDFileSet){.fid = fid}, tDFileSetCmprFn, TD_EQ);
// open reader
pWriter->pDataFReader = NULL;
pWriter->iterList = NULL;
pWriter->pDIter = NULL;
pWriter->pSIter = NULL;
tRBTreeCreate(&pWriter->rbt, tsdbDataIterCmprFn);
if (pSet) {
code = tsdbDataFReaderOpen(&pWriter->pDataFReader, pTsdb, pSet);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbOpenDataFileDataIter(pWriter->pDataFReader, &pWriter->pDIter);
TSDB_CHECK_CODE(code, lino, _exit);
if (pWriter->pDIter) {
pWriter->pDIter->next = pWriter->iterList;
pWriter->iterList = pWriter->pDIter;
}
if (c == 0) {
SBlockData* pBData = &pWriter->dWriter.bData;
for (int32_t iStt = 0; iStt < pSet->nSttF; iStt++) {
code = tsdbOpenSttFileDataIter(pWriter->pDataFReader, iStt, &pWriter->pSIter);
TSDB_CHECK_CODE(code, lino, _exit);
for (; pWriter->dReader.iRow < pWriter->dReader.bData.nRow; pWriter->dReader.iRow++) {
TSDBROW row = tsdbRowFromBlockData(&pWriter->dReader.bData, pWriter->dReader.iRow);
if (pWriter->pSIter) {
code = tsdbSttFileDataIterNext(pWriter->pSIter, NULL);
TSDB_CHECK_CODE(code, lino, _exit);
code = tBlockDataAppendRow(pBData, &row, NULL, pWriter->id.uid);
if (code) goto _err;
// add to tree
tRBTreePut(&pWriter->rbt, &pWriter->pSIter->rbtn);
if (pBData->nRow >= pWriter->maxRow) {
code = tsdbWriteDataBlock(pWriter->dWriter.pWriter, pBData, &pWriter->dWriter.mDataBlk, pWriter->cmprAlg);
if (code) goto _err;
// add to list
pWriter->pSIter->next = pWriter->iterList;
pWriter->iterList = pWriter->pSIter;
}
}
code = tsdbWriteDataBlock(pWriter->dWriter.pWriter, pBData, &pWriter->dWriter.mDataBlk, pWriter->cmprAlg);
if (code) goto _err;
pWriter->pSIter = NULL;
}
// open writer
SDiskID diskId;
if (pSet) {
diskId = pSet->diskId;
} else {
tfsAllocDisk(pTsdb->pVnode->pTfs, 0 /*TODO*/, &diskId);
tfsMkdirRecurAt(pTsdb->pVnode->pTfs, pTsdb->path, diskId);
}
SDFileSet wSet = {.diskId = diskId,
.fid = fid,
.pHeadF = &(SHeadFile){.commitID = pWriter->commitID},
.pDataF = (pSet) ? pSet->pDataF : &(SDataFile){.commitID = pWriter->commitID},
.pSmaF = (pSet) ? pSet->pSmaF : &(SSmaFile){.commitID = pWriter->commitID},
.nSttF = 1,
.aSttF = {&(SSttFile){.commitID = pWriter->commitID}}};
code = tsdbDataFWriterOpen(&pWriter->pDataFWriter, pTsdb, &wSet);
TSDB_CHECK_CODE(code, lino, _exit);
for (; pWriter->dReader.iDataBlk < pWriter->dReader.mDataBlk.nItem; pWriter->dReader.iDataBlk++) {
SDataBlk dataBlk;
tMapDataGetItemByIdx(&pWriter->dReader.mDataBlk, pWriter->dReader.iDataBlk, &dataBlk, tGetDataBlk);
if (pWriter->aBlockIdx) {
taosArrayClear(pWriter->aBlockIdx);
} else if ((pWriter->aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx))) == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
code = tMapDataPutItem(&pWriter->dWriter.mDataBlk, &dataBlk, tPutDataBlk);
if (code) goto _err;
}
tMapDataReset(&pWriter->mDataBlk);
code = tsdbSnapNextTableData(pWriter);
if (code) goto _err;
if (pWriter->aSttBlk) {
taosArrayClear(pWriter->aSttBlk);
} else if ((pWriter->aSttBlk = taosArrayInit(0, sizeof(SSttBlk))) == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
if (pWriter->dWriter.mDataBlk.nItem) {
SBlockIdx blockIdx = {.suid = pWriter->id.suid, .uid = pWriter->id.uid};
code = tsdbWriteDataBlk(pWriter->dWriter.pWriter, &pWriter->dWriter.mDataBlk, &blockIdx);
tBlockDataReset(&pWriter->bData);
tBlockDataReset(&pWriter->sData);
if (taosArrayPush(pWriter->dWriter.aBlockIdx, &blockIdx) == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s, fid:%d", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code),
fid);
} else {
tsdbDebug("vgId:%d %s done, fid:%d", TD_VID(pTsdb->pVnode), __func__, fid);
}
return code;
}
static int32_t tsdbSnapWriteTableData(STsdbSnapWriter* pWriter, SRowInfo* pRowInfo) {
int32_t code = 0;
int32_t lino = 0;
// switch to new table if need
if (pRowInfo == NULL || pRowInfo->uid != pWriter->tbid.uid) {
if (pWriter->tbid.uid) {
code = tsdbSnapWriteTableDataEnd(pWriter);
TSDB_CHECK_CODE(code, lino, _exit);
}
code = tsdbSnapWriteTableDataStart(pWriter, (TABLEID*)pRowInfo);
TSDB_CHECK_CODE(code, lino, _exit);
}
pWriter->id.suid = 0;
pWriter->id.uid = 0;
if (pRowInfo == NULL) goto _exit;
return code;
code = tsdbSnapWriteTableRow(pWriter, &pRowInfo->row);
TSDB_CHECK_CODE(code, lino, _exit);
_err:
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code));
}
return code;
}
static int32_t tsdbSnapWriteOpenFile(STsdbSnapWriter* pWriter, int32_t fid) {
static int32_t tsdbSnapWriteNextRow(STsdbSnapWriter* pWriter, SRowInfo** ppRowInfo) {
int32_t code = 0;
STsdb* pTsdb = pWriter->pTsdb;
ASSERT(pWriter->dWriter.pWriter == NULL);
int32_t lino = 0;
pWriter->fid = fid;
pWriter->id = (TABLEID){0};
SDFileSet* pSet = taosArraySearch(pWriter->fs.aDFileSet, &(SDFileSet){.fid = fid}, tDFileSetCmprFn, TD_EQ);
if (pWriter->pSIter) {
code = tsdbDataIterNext2(pWriter->pSIter, NULL);
TSDB_CHECK_CODE(code, lino, _exit);
// Reader
if (pSet) {
code = tsdbDataFReaderOpen(&pWriter->dReader.pReader, pWriter->pTsdb, pSet);
if (code) goto _err;
if (pWriter->pSIter->rowInfo.suid == 0 && pWriter->pSIter->rowInfo.uid == 0) {
pWriter->pSIter = NULL;
} else {
SRBTreeNode* pNode = tRBTreeMin(&pWriter->rbt);
if (pNode) {
int32_t c = tsdbDataIterCmprFn(&pWriter->pSIter->rbtn, pNode);
if (c > 0) {
tRBTreePut(&pWriter->rbt, &pWriter->pSIter->rbtn);
pWriter->pSIter = NULL;
} else if (c == 0) {
ASSERT(0);
}
}
}
}
code = tsdbReadBlockIdx(pWriter->dReader.pReader, pWriter->dReader.aBlockIdx);
if (code) goto _err;
} else {
ASSERT(pWriter->dReader.pReader == NULL);
taosArrayClear(pWriter->dReader.aBlockIdx);
}
pWriter->dReader.iBlockIdx = 0; // point to the next one
code = tsdbSnapNextTableData(pWriter);
if (code) goto _err;
// Writer
SHeadFile fHead = {.commitID = pWriter->commitID};
SDataFile fData = {.commitID = pWriter->commitID};
SSmaFile fSma = {.commitID = pWriter->commitID};
SSttFile fStt = {.commitID = pWriter->commitID};
SDFileSet wSet = {.fid = pWriter->fid, .pHeadF = &fHead, .pDataF = &fData, .pSmaF = &fSma};
if (pSet) {
wSet.diskId = pSet->diskId;
fData = *pSet->pDataF;
fSma = *pSet->pSmaF;
for (int32_t iStt = 0; iStt < pSet->nSttF; iStt++) {
wSet.aSttF[iStt] = pSet->aSttF[iStt];
if (pWriter->pSIter == NULL) {
SRBTreeNode* pNode = tRBTreeMin(&pWriter->rbt);
if (pNode) {
tRBTreeDrop(&pWriter->rbt, pNode);
pWriter->pSIter = TSDB_RBTN_TO_DATA_ITER(pNode);
}
wSet.nSttF = pSet->nSttF + 1; // TODO: fix pSet->nSttF == pTsdb->maxFile
} else {
SDiskID did = {0};
tfsAllocDisk(pTsdb->pVnode->pTfs, 0, &did);
tfsMkdirRecurAt(pTsdb->pVnode->pTfs, pTsdb->path, did);
wSet.diskId = did;
wSet.nSttF = 1;
}
wSet.aSttF[wSet.nSttF - 1] = &fStt;
code = tsdbDataFWriterOpen(&pWriter->dWriter.pWriter, pWriter->pTsdb, &wSet);
if (code) goto _err;
taosArrayClear(pWriter->dWriter.aBlockIdx);
tMapDataReset(&pWriter->dWriter.mDataBlk);
taosArrayClear(pWriter->dWriter.aSttBlk);
tBlockDataReset(&pWriter->dWriter.bData);
tBlockDataReset(&pWriter->dWriter.sData);
}
return code;
if (ppRowInfo) {
if (pWriter->pSIter) {
*ppRowInfo = &pWriter->pSIter->rowInfo;
} else {
*ppRowInfo = NULL;
}
}
_err:
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code));
}
return code;
}
static int32_t tsdbSnapWriteCloseFile(STsdbSnapWriter* pWriter) {
static int32_t tsdbSnapWriteGetRow(STsdbSnapWriter* pWriter, SRowInfo** ppRowInfo) {
int32_t code = 0;
int32_t lino = 0;
ASSERT(pWriter->dWriter.pWriter);
code = tsdbSnapWriteTableDataEnd(pWriter);
if (code) goto _err;
// copy remain table data
TABLEID id = {.suid = INT64_MAX, .uid = INT64_MAX};
code = tsdbSnapWriteCopyData(pWriter, &id);
if (code) goto _err;
code =
tsdbWriteSttBlock(pWriter->dWriter.pWriter, &pWriter->dWriter.sData, pWriter->dWriter.aSttBlk, pWriter->cmprAlg);
if (code) goto _err;
// Indices
code = tsdbWriteBlockIdx(pWriter->dWriter.pWriter, pWriter->dWriter.aBlockIdx);
if (code) goto _err;
code = tsdbWriteSttBlk(pWriter->dWriter.pWriter, pWriter->dWriter.aSttBlk);
if (code) goto _err;
code = tsdbUpdateDFileSetHeader(pWriter->dWriter.pWriter);
if (code) goto _err;
code = tsdbFSUpsertFSet(&pWriter->fs, &pWriter->dWriter.pWriter->wSet);
if (code) goto _err;
code = tsdbDataFWriterClose(&pWriter->dWriter.pWriter, 1);
if (code) goto _err;
if (pWriter->dReader.pReader) {
code = tsdbDataFReaderClose(&pWriter->dReader.pReader);
if (code) goto _err;
if (pWriter->pSIter) {
*ppRowInfo = &pWriter->pSIter->rowInfo;
goto _exit;
}
_exit:
return code;
code = tsdbSnapWriteNextRow(pWriter, ppRowInfo);
TSDB_CHECK_CODE(code, lino, _exit);
_err:
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code));
}
return code;
}
static int32_t tsdbSnapWriteToDataFile(STsdbSnapWriter* pWriter, int32_t iRow, int8_t* done) {
static int32_t tsdbSnapWriteFileDataEnd(STsdbSnapWriter* pWriter) {
int32_t code = 0;
int32_t lino = 0;
SBlockData* pBData = &pWriter->bData;
TABLEID id = {.suid = pBData->suid, .uid = pBData->uid ? pBData->uid : pBData->aUid[iRow]};
TSDBROW row = tsdbRowFromBlockData(pBData, iRow);
TSDBKEY key = TSDBROW_KEY(&row);
ASSERT(pWriter->pDataFWriter);
*done = 0;
while (pWriter->dReader.iRow < pWriter->dReader.bData.nRow ||
pWriter->dReader.iDataBlk < pWriter->dReader.mDataBlk.nItem) {
// Merge row by row
for (; pWriter->dReader.iRow < pWriter->dReader.bData.nRow; pWriter->dReader.iRow++) {
TSDBROW trow = tsdbRowFromBlockData(&pWriter->dReader.bData, pWriter->dReader.iRow);
TSDBKEY tKey = TSDBROW_KEY(&trow);
// consume remain data and end with a NULL table row
SRowInfo* pRowInfo;
code = tsdbSnapWriteGetRow(pWriter, &pRowInfo);
TSDB_CHECK_CODE(code, lino, _exit);
for (;;) {
code = tsdbSnapWriteTableData(pWriter, pRowInfo);
TSDB_CHECK_CODE(code, lino, _exit);
ASSERT(pWriter->dReader.bData.suid == id.suid && pWriter->dReader.bData.uid == id.uid);
if (pRowInfo == NULL) break;
int32_t c = tsdbKeyCmprFn(&key, &tKey);
if (c < 0) {
code = tBlockDataAppendRow(&pWriter->dWriter.bData, &row, NULL, id.uid);
if (code) goto _err;
} else if (c > 0) {
code = tBlockDataAppendRow(&pWriter->dWriter.bData, &trow, NULL, id.uid);
if (code) goto _err;
} else {
ASSERT(0);
}
code = tsdbSnapWriteNextRow(pWriter, &pRowInfo);
TSDB_CHECK_CODE(code, lino, _exit);
}
if (pWriter->dWriter.bData.nRow >= pWriter->maxRow) {
code = tsdbWriteDataBlock(pWriter->dWriter.pWriter, &pWriter->dWriter.bData, &pWriter->dWriter.mDataBlk,
pWriter->cmprAlg);
if (code) goto _err;
}
// do file-level updates
code = tsdbWriteSttBlk(pWriter->pDataFWriter, pWriter->aSttBlk);
TSDB_CHECK_CODE(code, lino, _exit);
if (c < 0) {
*done = 1;
goto _exit;
}
}
code = tsdbWriteBlockIdx(pWriter->pDataFWriter, pWriter->aBlockIdx);
TSDB_CHECK_CODE(code, lino, _exit);
// Merge row by block
SDataBlk tDataBlk = {.minKey = key, .maxKey = key};
for (; pWriter->dReader.iDataBlk < pWriter->dReader.mDataBlk.nItem; pWriter->dReader.iDataBlk++) {
SDataBlk dataBlk;
tMapDataGetItemByIdx(&pWriter->dReader.mDataBlk, pWriter->dReader.iDataBlk, &dataBlk, tGetDataBlk);
code = tsdbUpdateDFileSetHeader(pWriter->pDataFWriter);
TSDB_CHECK_CODE(code, lino, _exit);
int32_t c = tDataBlkCmprFn(&dataBlk, &tDataBlk);
if (c < 0) {
code = tsdbWriteDataBlock(pWriter->dWriter.pWriter, &pWriter->dWriter.bData, &pWriter->dWriter.mDataBlk,
pWriter->cmprAlg);
if (code) goto _err;
code = tMapDataPutItem(&pWriter->dWriter.mDataBlk, &dataBlk, tPutDataBlk);
if (code) goto _err;
} else if (c > 0) {
code = tBlockDataAppendRow(&pWriter->dWriter.bData, &row, NULL, id.uid);
if (code) goto _err;
if (pWriter->dWriter.bData.nRow >= pWriter->maxRow) {
code = tsdbWriteDataBlock(pWriter->dWriter.pWriter, &pWriter->dWriter.bData, &pWriter->dWriter.mDataBlk,
pWriter->cmprAlg);
if (code) goto _err;
}
code = tsdbFSUpsertFSet(&pWriter->fs, &pWriter->pDataFWriter->wSet);
TSDB_CHECK_CODE(code, lino, _exit);
*done = 1;
goto _exit;
} else {
code = tsdbReadDataBlockEx(pWriter->dReader.pReader, &dataBlk, &pWriter->dReader.bData);
if (code) goto _err;
pWriter->dReader.iRow = 0;
code = tsdbDataFWriterClose(&pWriter->pDataFWriter, 1);
TSDB_CHECK_CODE(code, lino, _exit);
pWriter->dReader.iDataBlk++;
break;
}
}
if (pWriter->pDataFReader) {
code = tsdbDataFReaderClose(&pWriter->pDataFReader);
TSDB_CHECK_CODE(code, lino, _exit);
}
_exit:
return code;
// clear sources
while (pWriter->iterList) {
STsdbDataIter2* pIter = pWriter->iterList;
pWriter->iterList = pIter->next;
tsdbCloseDataIter2(pIter);
}
_err:
tsdbError("vgId:%d, %s failed since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, tstrerror(code));
_exit:
if (code) {
tsdbError("vgId:%d %s failed since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, tstrerror(code));
} else {
tsdbDebug("vgId:%d %s is done", TD_VID(pWriter->pTsdb->pVnode), __func__);
}
return code;
}
static int32_t tsdbSnapWriteToSttFile(STsdbSnapWriter* pWriter, int32_t iRow) {
static int32_t tsdbSnapWriteTimeSeriesData(STsdbSnapWriter* pWriter, SSnapDataHdr* pHdr) {
int32_t code = 0;
int32_t lino = 0;
TABLEID id = {.suid = pWriter->bData.suid,
.uid = pWriter->bData.uid ? pWriter->bData.uid : pWriter->bData.aUid[iRow]};
TSDBROW row = tsdbRowFromBlockData(&pWriter->bData, iRow);
SBlockData* pBData = &pWriter->dWriter.sData;
code = tDecmprBlockData(pHdr->data, pHdr->size, &pWriter->inData, pWriter->aBuf);
TSDB_CHECK_CODE(code, lino, _exit);
if (pBData->suid || pBData->uid) {
if (!TABLE_SAME_SCHEMA(pBData->suid, pBData->uid, id.suid, id.uid)) {
code = tsdbWriteSttBlock(pWriter->dWriter.pWriter, pBData, pWriter->dWriter.aSttBlk, pWriter->cmprAlg);
if (code) goto _err;
ASSERT(pWriter->inData.nRow > 0);
pBData->suid = 0;
pBData->uid = 0;
// switch to new data file if need
int32_t fid = tsdbKeyFid(pWriter->inData.aTSKEY[0], pWriter->minutes, pWriter->precision);
if (pWriter->fid != fid) {
if (pWriter->pDataFWriter) {
code = tsdbSnapWriteFileDataEnd(pWriter);
TSDB_CHECK_CODE(code, lino, _exit);
}
}
if (pBData->suid == 0 && pBData->uid == 0) {
code = tsdbUpdateTableSchema(pWriter->pTsdb->pVnode->pMeta, pWriter->id.suid, pWriter->id.uid, &pWriter->skmTable);
if (code) goto _err;
TABLEID tid = {.suid = pWriter->id.suid, .uid = pWriter->id.suid ? 0 : pWriter->id.uid};
code = tBlockDataInit(pBData, &tid, pWriter->skmTable.pTSchema, NULL, 0);
if (code) goto _err;
code = tsdbSnapWriteFileDataStart(pWriter, fid);
TSDB_CHECK_CODE(code, lino, _exit);
}
code = tBlockDataAppendRow(pBData, &row, NULL, id.uid);
if (code) goto _err;
// loop write each row
SRowInfo* pRowInfo;
code = tsdbSnapWriteGetRow(pWriter, &pRowInfo);
TSDB_CHECK_CODE(code, lino, _exit);
for (int32_t iRow = 0; iRow < pWriter->inData.nRow; ++iRow) {
SRowInfo rInfo = {.suid = pWriter->inData.suid,
.uid = pWriter->inData.uid ? pWriter->inData.uid : pWriter->inData.aUid[iRow],
.row = tsdbRowFromBlockData(&pWriter->inData, iRow)};
if (pBData->nRow >= pWriter->maxRow) {
code = tsdbWriteSttBlock(pWriter->dWriter.pWriter, pBData, pWriter->dWriter.aSttBlk, pWriter->cmprAlg);
if (code) goto _err;
for (;;) {
if (pRowInfo == NULL) {
code = tsdbSnapWriteTableData(pWriter, &rInfo);
TSDB_CHECK_CODE(code, lino, _exit);
break;
} else {
int32_t c = tRowInfoCmprFn(&rInfo, pRowInfo);
if (c < 0) {
code = tsdbSnapWriteTableData(pWriter, &rInfo);
TSDB_CHECK_CODE(code, lino, _exit);
break;
} else if (c > 0) {
code = tsdbSnapWriteTableData(pWriter, pRowInfo);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbSnapWriteNextRow(pWriter, &pRowInfo);
TSDB_CHECK_CODE(code, lino, _exit);
} else {
ASSERT(0);
}
}
}
}
_exit:
return code;
_err:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code));
} else {
tsdbDebug("vgId:%d %s done, suid:%" PRId64 " uid:%" PRId64 " nRow:%d", TD_VID(pWriter->pTsdb->pVnode), __func__,
pWriter->inData.suid, pWriter->inData.uid, pWriter->inData.nRow);
}
return code;
}
static int32_t tsdbSnapWriteRowData(STsdbSnapWriter* pWriter, int32_t iRow) {
// SNAP_DATA_DEL
static int32_t tsdbSnapWriteDelTableDataStart(STsdbSnapWriter* pWriter, TABLEID* pId) {
int32_t code = 0;
int32_t lino = 0;
SBlockData* pBlockData = &pWriter->bData;
TABLEID id = {.suid = pBlockData->suid, .uid = pBlockData->uid ? pBlockData->uid : pBlockData->aUid[iRow]};
// End last table data write if need
if (tTABLEIDCmprFn(&pWriter->id, &id) != 0) {
code = tsdbSnapWriteTableDataEnd(pWriter);
if (code) goto _err;
}
// Start new table data write if need
if (pWriter->id.suid == 0 && pWriter->id.uid == 0) {
code = tsdbSnapWriteTableDataStart(pWriter, &id);
if (code) goto _err;
}
// Merge with .data file data
int8_t done = 0;
if (pWriter->dReader.pBlockIdx && tTABLEIDCmprFn(pWriter->dReader.pBlockIdx, &id) == 0) {
code = tsdbSnapWriteToDataFile(pWriter, iRow, &done);
if (code) goto _err;
}
// Append to the .stt data block (todo: check if need to set/reload sst block)
if (!done) {
code = tsdbSnapWriteToSttFile(pWriter, iRow);
if (code) goto _err;
if (pId) {
pWriter->tbid = *pId;
} else {
pWriter->tbid = (TABLEID){.suid = INT64_MAX, .uid = INT64_MAX};
}
_exit:
return code;
taosArrayClear(pWriter->aDelData);
_err:
tsdbError("vgId:%d, %s failed since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, tstrerror(code));
return code;
}
if (pWriter->pTIter) {
while (pWriter->pTIter->tIter.iDelIdx < taosArrayGetSize(pWriter->pTIter->tIter.aDelIdx)) {
SDelIdx* pDelIdx = taosArrayGet(pWriter->pTIter->tIter.aDelIdx, pWriter->pTIter->tIter.iDelIdx);
static int32_t tsdbSnapWriteData(STsdbSnapWriter* pWriter, uint8_t* pData, uint32_t nData) {
int32_t code = 0;
STsdb* pTsdb = pWriter->pTsdb;
SBlockData* pBlockData = &pWriter->bData;
int32_t c = tTABLEIDCmprFn(pDelIdx, &pWriter->tbid);
if (c < 0) {
code = tsdbReadDelData(pWriter->pDelFReader, pDelIdx, pWriter->pTIter->tIter.aDelData);
TSDB_CHECK_CODE(code, lino, _exit);
// Decode data
SSnapDataHdr* pHdr = (SSnapDataHdr*)pData;
code = tDecmprBlockData(pHdr->data, pHdr->size, pBlockData, pWriter->aBuf);
if (code) goto _err;
SDelIdx* pDelIdxNew = taosArrayReserve(pWriter->aDelIdx, 1);
if (pDelIdxNew == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
ASSERT(pBlockData->nRow > 0);
pDelIdxNew->suid = pDelIdx->suid;
pDelIdxNew->uid = pDelIdx->uid;
// Loop to handle each row
for (int32_t iRow = 0; iRow < pBlockData->nRow; iRow++) {
TSKEY ts = pBlockData->aTSKEY[iRow];
int32_t fid = tsdbKeyFid(ts, pWriter->minutes, pWriter->precision);
code = tsdbWriteDelData(pWriter->pDelFWriter, pWriter->pTIter->tIter.aDelData, pDelIdxNew);
TSDB_CHECK_CODE(code, lino, _exit);
if (pWriter->dWriter.pWriter == NULL || pWriter->fid != fid) {
if (pWriter->dWriter.pWriter) {
// ASSERT(fid > pWriter->fid);
pWriter->pTIter->tIter.iDelIdx++;
} else if (c == 0) {
code = tsdbReadDelData(pWriter->pDelFReader, pDelIdx, pWriter->aDelData);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbSnapWriteCloseFile(pWriter);
if (code) goto _err;
pWriter->pTIter->tIter.iDelIdx++;
break;
} else {
break;
}
code = tsdbSnapWriteOpenFile(pWriter, fid);
if (code) goto _err;
}
code = tsdbSnapWriteRowData(pWriter, iRow);
if (code) goto _err;
}
return code;
_err:
tsdbError("vgId:%d, vnode snapshot tsdb write data for %s failed since %s", TD_VID(pTsdb->pVnode), pTsdb->path,
tstrerror(code));
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code));
} else {
tsdbTrace("vgId:%d %s done, suid:%" PRId64 " uid:%" PRId64, TD_VID(pWriter->pTsdb->pVnode), __func__,
pWriter->tbid.suid, pWriter->tbid.uid);
}
return code;
}
// SNAP_DATA_DEL
static int32_t tsdbSnapMoveWriteDelData(STsdbSnapWriter* pWriter, TABLEID* pId) {
static int32_t tsdbSnapWriteDelTableDataEnd(STsdbSnapWriter* pWriter) {
int32_t code = 0;
int32_t lino = 0;
while (true) {
if (pWriter->iDelIdx >= taosArrayGetSize(pWriter->aDelIdxR)) break;
SDelIdx* pDelIdx = (SDelIdx*)taosArrayGet(pWriter->aDelIdxR, pWriter->iDelIdx);
if (tTABLEIDCmprFn(pDelIdx, pId) >= 0) break;
code = tsdbReadDelData(pWriter->pDelFReader, pDelIdx, pWriter->aDelData);
if (code) goto _exit;
SDelIdx delIdx = *pDelIdx;
code = tsdbWriteDelData(pWriter->pDelFWriter, pWriter->aDelData, &delIdx);
if (code) goto _exit;
if (taosArrayPush(pWriter->aDelIdxW, &delIdx) == NULL) {
if (taosArrayGetSize(pWriter->aDelData) > 0) {
SDelIdx* pDelIdx = taosArrayReserve(pWriter->aDelIdx, 1);
if (pDelIdx == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _exit;
TSDB_CHECK_CODE(code, lino, _exit);
}
pWriter->iDelIdx++;
pDelIdx->suid = pWriter->tbid.suid;
pDelIdx->uid = pWriter->tbid.uid;
code = tsdbWriteDelData(pWriter->pDelFWriter, pWriter->aDelData, pDelIdx);
TSDB_CHECK_CODE(code, lino, _exit);
}
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code));
} else {
tsdbTrace("vgId:%d %s done", TD_VID(pWriter->pTsdb->pVnode), __func__);
}
return code;
}
static int32_t tsdbSnapWriteDel(STsdbSnapWriter* pWriter, uint8_t* pData, uint32_t nData) {
static int32_t tsdbSnapWriteDelTableData(STsdbSnapWriter* pWriter, TABLEID* pId, uint8_t* pData, int64_t size) {
int32_t code = 0;
STsdb* pTsdb = pWriter->pTsdb;
// Open del file if not opened yet
if (pWriter->pDelFWriter == NULL) {
SDelFile* pDelFile = pWriter->fs.pDelFile;
// reader
if (pDelFile) {
code = tsdbDelFReaderOpen(&pWriter->pDelFReader, pDelFile, pTsdb);
if (code) goto _err;
int32_t lino = 0;
code = tsdbReadDelIdx(pWriter->pDelFReader, pWriter->aDelIdxR);
if (code) goto _err;
} else {
taosArrayClear(pWriter->aDelIdxR);
if (pId == NULL || pId->uid != pWriter->tbid.uid) {
if (pWriter->tbid.uid) {
code = tsdbSnapWriteDelTableDataEnd(pWriter);
TSDB_CHECK_CODE(code, lino, _exit);
}
pWriter->iDelIdx = 0;
// writer
SDelFile delFile = {.commitID = pWriter->commitID};
code = tsdbDelFWriterOpen(&pWriter->pDelFWriter, &delFile, pTsdb);
if (code) goto _err;
taosArrayClear(pWriter->aDelIdxW);
code = tsdbSnapWriteDelTableDataStart(pWriter, pId);
TSDB_CHECK_CODE(code, lino, _exit);
}
SSnapDataHdr* pHdr = (SSnapDataHdr*)pData;
TABLEID id = *(TABLEID*)pHdr->data;
if (pId == NULL) goto _exit;
ASSERT(pHdr->size + sizeof(SSnapDataHdr) == nData);
int64_t n = 0;
while (n < size) {
SDelData delData;
n += tGetDelData(pData + n, &delData);
// Move write data < id
code = tsdbSnapMoveWriteDelData(pWriter, &id);
if (code) goto _err;
if (taosArrayPush(pWriter->aDelData, &delData) < 0) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
}
ASSERT(n == size);
// Merge incoming data with current
if (pWriter->iDelIdx < taosArrayGetSize(pWriter->aDelIdxR) &&
tTABLEIDCmprFn(taosArrayGet(pWriter->aDelIdxR, pWriter->iDelIdx), &id) == 0) {
SDelIdx* pDelIdx = (SDelIdx*)taosArrayGet(pWriter->aDelIdxR, pWriter->iDelIdx);
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code));
}
return code;
}
code = tsdbReadDelData(pWriter->pDelFReader, pDelIdx, pWriter->aDelData);
if (code) goto _err;
static int32_t tsdbSnapWriteDelDataStart(STsdbSnapWriter* pWriter) {
int32_t code = 0;
int32_t lino = 0;
pWriter->iDelIdx++;
} else {
taosArrayClear(pWriter->aDelData);
}
STsdb* pTsdb = pWriter->pTsdb;
SDelFile* pDelFile = pWriter->fs.pDelFile;
int64_t n = sizeof(SSnapDataHdr) + sizeof(TABLEID);
while (n < nData) {
SDelData delData;
pWriter->tbid = (TABLEID){0};
n += tGetDelData(pData + n, &delData);
// reader
if (pDelFile) {
code = tsdbDelFReaderOpen(&pWriter->pDelFReader, pDelFile, pTsdb);
TSDB_CHECK_CODE(code, lino, _exit);
if (taosArrayPush(pWriter->aDelData, &delData) == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
}
code = tsdbOpenTombFileDataIter(pWriter->pDelFReader, &pWriter->pTIter);
TSDB_CHECK_CODE(code, lino, _exit);
}
SDelIdx delIdx = {.suid = id.suid, .uid = id.uid};
code = tsdbWriteDelData(pWriter->pDelFWriter, pWriter->aDelData, &delIdx);
if (code) goto _err;
// writer
code = tsdbDelFWriterOpen(&pWriter->pDelFWriter, &(SDelFile){.commitID = pWriter->commitID}, pTsdb);
TSDB_CHECK_CODE(code, lino, _exit);
if (taosArrayPush(pWriter->aDelIdxW, &delIdx) == NULL) {
if ((pWriter->aDelIdx = taosArrayInit(0, sizeof(SDelIdx))) == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _err;
TSDB_CHECK_CODE(code, lino, _exit);
}
if ((pWriter->aDelData = taosArrayInit(0, sizeof(SDelData))) == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
return code;
_err:
tsdbError("vgId:%d, vnode snapshot tsdb write del for %s failed since %s", TD_VID(pTsdb->pVnode), pTsdb->path,
tstrerror(code));
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code));
} else {
tsdbDebug("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__);
}
return code;
}
static int32_t tsdbSnapWriteDelEnd(STsdbSnapWriter* pWriter) {
static int32_t tsdbSnapWriteDelDataEnd(STsdbSnapWriter* pWriter) {
int32_t code = 0;
STsdb* pTsdb = pWriter->pTsdb;
int32_t lino = 0;
if (pWriter->pDelFWriter == NULL) return code;
STsdb* pTsdb = pWriter->pTsdb;
TABLEID id = {.suid = INT64_MAX, .uid = INT64_MAX};
code = tsdbSnapMoveWriteDelData(pWriter, &id);
if (code) goto _err;
// end remaining table with NULL data
code = tsdbSnapWriteDelTableData(pWriter, NULL, NULL, 0);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbWriteDelIdx(pWriter->pDelFWriter, pWriter->aDelIdxW);
if (code) goto _err;
// update file-level info
code = tsdbWriteDelIdx(pWriter->pDelFWriter, pWriter->aDelIdx);
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbUpdateDelFileHdr(pWriter->pDelFWriter);
if (code) goto _err;
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbFSUpsertDelFile(&pWriter->fs, &pWriter->pDelFWriter->fDel);
if (code) goto _err;
TSDB_CHECK_CODE(code, lino, _exit);
code = tsdbDelFWriterClose(&pWriter->pDelFWriter, 1);
if (code) goto _err;
TSDB_CHECK_CODE(code, lino, _exit);
if (pWriter->pDelFReader) {
code = tsdbDelFReaderClose(&pWriter->pDelFReader);
if (code) goto _err;
TSDB_CHECK_CODE(code, lino, _exit);
}
if (pWriter->pTIter) {
tsdbCloseDataIter2(pWriter->pTIter);
pWriter->pTIter = NULL;
}
tsdbInfo("vgId:%d, vnode snapshot tsdb write del for %s end", TD_VID(pTsdb->pVnode), pTsdb->path);
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code));
} else {
tsdbInfo("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__);
}
return code;
}
static int32_t tsdbSnapWriteDelData(STsdbSnapWriter* pWriter, SSnapDataHdr* pHdr) {
int32_t code = 0;
int32_t lino = 0;
STsdb* pTsdb = pWriter->pTsdb;
_err:
tsdbError("vgId:%d, vnode snapshot tsdb write del end for %s failed since %s", TD_VID(pTsdb->pVnode), pTsdb->path,
tstrerror(code));
// start to write del data if need
if (pWriter->pDelFWriter == NULL) {
code = tsdbSnapWriteDelDataStart(pWriter);
TSDB_CHECK_CODE(code, lino, _exit);
}
// do write del data
code = tsdbSnapWriteDelTableData(pWriter, (TABLEID*)pHdr->data, pHdr->data + sizeof(TABLEID),
pHdr->size - sizeof(TABLEID));
TSDB_CHECK_CODE(code, lino, _exit);
_exit:
if (code) {
tsdbError("vgId:%d %s failed since %s", TD_VID(pTsdb->pVnode), __func__, tstrerror(code));
} else {
tsdbTrace("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__);
}
return code;
}
// APIs
int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWriter** ppWriter) {
int32_t code = 0;
int32_t lino = 0;
STsdbSnapWriter* pWriter = NULL;
int32_t code = 0;
int32_t lino = 0;
// alloc
pWriter = (STsdbSnapWriter*)taosMemoryCalloc(1, sizeof(*pWriter));
STsdbSnapWriter* pWriter = (STsdbSnapWriter*)taosMemoryCalloc(1, sizeof(*pWriter));
if (pWriter == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
......@@ -1288,11 +1833,6 @@ int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWr
pWriter->pTsdb = pTsdb;
pWriter->sver = sver;
pWriter->ever = ever;
code = tsdbFSCopy(pTsdb, &pWriter->fs);
TSDB_CHECK_CODE(code, lino, _exit);
// config
pWriter->minutes = pTsdb->keepCfg.days;
pWriter->precision = pTsdb->keepCfg.precision;
pWriter->minRow = pTsdb->pVnode->config.tsdbCfg.minRows;
......@@ -1300,102 +1840,70 @@ int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWr
pWriter->cmprAlg = pTsdb->pVnode->config.tsdbCfg.compression;
pWriter->commitID = pTsdb->pVnode->state.commitID;
code = tsdbFSCopy(pTsdb, &pWriter->fs);
TSDB_CHECK_CODE(code, lino, _exit);
// SNAP_DATA_TSDB
code = tBlockDataCreate(&pWriter->bData);
code = tBlockDataCreate(&pWriter->inData);
TSDB_CHECK_CODE(code, lino, _exit);
pWriter->fid = INT32_MIN;
pWriter->id = (TABLEID){0};
// Reader
pWriter->dReader.aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx));
if (pWriter->dReader.aBlockIdx == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
code = tBlockDataCreate(&pWriter->dReader.bData);
TSDB_CHECK_CODE(code, lino, _exit);
// Writer
pWriter->dWriter.aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx));
if (pWriter->dWriter.aBlockIdx == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
pWriter->dWriter.aSttBlk = taosArrayInit(0, sizeof(SSttBlk));
if (pWriter->dWriter.aSttBlk == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
code = tBlockDataCreate(&pWriter->dWriter.bData);
code = tBlockDataCreate(&pWriter->bData);
TSDB_CHECK_CODE(code, lino, _exit);
code = tBlockDataCreate(&pWriter->dWriter.sData);
code = tBlockDataCreate(&pWriter->sData);
TSDB_CHECK_CODE(code, lino, _exit);
// SNAP_DATA_DEL
pWriter->aDelIdxR = taosArrayInit(0, sizeof(SDelIdx));
if (pWriter->aDelIdxR == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
pWriter->aDelData = taosArrayInit(0, sizeof(SDelData));
if (pWriter->aDelData == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
pWriter->aDelIdxW = taosArrayInit(0, sizeof(SDelIdx));
if (pWriter->aDelIdxW == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
_exit:
if (code) {
tsdbError("vgId:%d, %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code));
*ppWriter = NULL;
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code));
if (pWriter) {
if (pWriter->aDelIdxW) taosArrayDestroy(pWriter->aDelIdxW);
if (pWriter->aDelData) taosArrayDestroy(pWriter->aDelData);
if (pWriter->aDelIdxR) taosArrayDestroy(pWriter->aDelIdxR);
tBlockDataDestroy(&pWriter->dWriter.sData, 1);
tBlockDataDestroy(&pWriter->dWriter.bData, 1);
if (pWriter->dWriter.aSttBlk) taosArrayDestroy(pWriter->dWriter.aSttBlk);
if (pWriter->dWriter.aBlockIdx) taosArrayDestroy(pWriter->dWriter.aBlockIdx);
tBlockDataDestroy(&pWriter->dReader.bData, 1);
if (pWriter->dReader.aBlockIdx) taosArrayDestroy(pWriter->dReader.aBlockIdx);
tBlockDataDestroy(&pWriter->sData, 1);
tBlockDataDestroy(&pWriter->bData, 1);
tBlockDataDestroy(&pWriter->inData, 1);
tsdbFSDestroy(&pWriter->fs);
taosMemoryFree(pWriter);
pWriter = NULL;
}
} else {
tsdbInfo("vgId:%d, %s done", TD_VID(pTsdb->pVnode), __func__);
*ppWriter = pWriter;
tsdbInfo("vgId:%d %s done, sver:%" PRId64 " ever:%" PRId64, TD_VID(pTsdb->pVnode), __func__, sver, ever);
}
*ppWriter = pWriter;
return code;
}
int32_t tsdbSnapWriterPrepareClose(STsdbSnapWriter* pWriter) {
int32_t code = 0;
if (pWriter->dWriter.pWriter) {
code = tsdbSnapWriteCloseFile(pWriter);
if (code) goto _exit;
int32_t lino = 0;
if (pWriter->pDataFWriter) {
code = tsdbSnapWriteFileDataEnd(pWriter);
TSDB_CHECK_CODE(code, lino, _exit);
}
code = tsdbSnapWriteDelEnd(pWriter);
if (code) goto _exit;
if (pWriter->pDelFWriter) {
code = tsdbSnapWriteDelDataEnd(pWriter);
TSDB_CHECK_CODE(code, lino, _exit);
}
code = tsdbFSPrepareCommit(pWriter->pTsdb, &pWriter->fs);
if (code) goto _exit;
TSDB_CHECK_CODE(code, lino, _exit);
_exit:
if (code) {
tsdbError("vgId:%d, %s failed since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, tstrerror(code));
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code));
} else {
tsdbDebug("vgId:%d %s done", TD_VID(pWriter->pTsdb->pVnode), __func__);
}
return code;
}
int32_t tsdbSnapWriterClose(STsdbSnapWriter** ppWriter, int8_t rollback) {
int32_t code = 0;
int32_t code = 0;
int32_t lino = 0;
STsdbSnapWriter* pWriter = *ppWriter;
STsdb* pTsdb = pWriter->pTsdb;
......@@ -1408,7 +1916,7 @@ int32_t tsdbSnapWriterClose(STsdbSnapWriter** ppWriter, int8_t rollback) {
code = tsdbFSCommit(pWriter->pTsdb);
if (code) {
taosThreadRwlockUnlock(&pTsdb->rwLock);
goto _err;
TSDB_CHECK_CODE(code, lino, _exit);
}
// unlock
......@@ -1416,72 +1924,60 @@ int32_t tsdbSnapWriterClose(STsdbSnapWriter** ppWriter, int8_t rollback) {
}
// SNAP_DATA_DEL
taosArrayDestroy(pWriter->aDelIdxW);
taosArrayDestroy(pWriter->aDelData);
taosArrayDestroy(pWriter->aDelIdxR);
taosArrayDestroy(pWriter->aDelIdx);
// SNAP_DATA_TSDB
// Writer
tBlockDataDestroy(&pWriter->dWriter.sData, 1);
tBlockDataDestroy(&pWriter->dWriter.bData, 1);
taosArrayDestroy(pWriter->dWriter.aSttBlk);
tMapDataClear(&pWriter->dWriter.mDataBlk);
taosArrayDestroy(pWriter->dWriter.aBlockIdx);
// Reader
tBlockDataDestroy(&pWriter->dReader.bData, 1);
tMapDataClear(&pWriter->dReader.mDataBlk);
taosArrayDestroy(pWriter->dReader.aBlockIdx);
tBlockDataDestroy(&pWriter->sData, 1);
tBlockDataDestroy(&pWriter->bData, 1);
taosArrayDestroy(pWriter->aSttBlk);
tMapDataClear(&pWriter->mDataBlk);
taosArrayDestroy(pWriter->aBlockIdx);
tDestroyTSchema(pWriter->skmTable.pTSchema);
tBlockDataDestroy(&pWriter->inData, 1);
for (int32_t iBuf = 0; iBuf < sizeof(pWriter->aBuf) / sizeof(uint8_t*); iBuf++) {
tFree(pWriter->aBuf[iBuf]);
}
tsdbInfo("vgId:%d, %s done", TD_VID(pWriter->pTsdb->pVnode), __func__);
tsdbFSDestroy(&pWriter->fs);
taosMemoryFree(pWriter);
*ppWriter = NULL;
return code;
_err:
tsdbError("vgId:%d, vnode snapshot tsdb writer close for %s failed since %s", TD_VID(pWriter->pTsdb->pVnode),
pWriter->pTsdb->path, tstrerror(code));
taosMemoryFree(pWriter);
*ppWriter = NULL;
_exit:
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code));
} else {
tsdbInfo("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__);
}
return code;
}
int32_t tsdbSnapWrite(STsdbSnapWriter* pWriter, uint8_t* pData, uint32_t nData) {
int32_t code = 0;
SSnapDataHdr* pHdr = (SSnapDataHdr*)pData;
int32_t tsdbSnapWrite(STsdbSnapWriter* pWriter, SSnapDataHdr* pHdr) {
int32_t code = 0;
int32_t lino = 0;
// ts data
if (pHdr->type == SNAP_DATA_TSDB) {
code = tsdbSnapWriteData(pWriter, pData, nData);
if (code) goto _err;
code = tsdbSnapWriteTimeSeriesData(pWriter, pHdr);
TSDB_CHECK_CODE(code, lino, _exit);
goto _exit;
} else {
if (pWriter->dWriter.pWriter) {
code = tsdbSnapWriteCloseFile(pWriter);
if (code) goto _err;
}
} else if (pWriter->pDataFWriter) {
code = tsdbSnapWriteFileDataEnd(pWriter);
TSDB_CHECK_CODE(code, lino, _exit);
}
// del data
if (pHdr->type == SNAP_DATA_DEL) {
code = tsdbSnapWriteDel(pWriter, pData, nData);
if (code) goto _err;
code = tsdbSnapWriteDelData(pWriter, pHdr);
TSDB_CHECK_CODE(code, lino, _exit);
goto _exit;
}
_exit:
tsdbDebug("vgId:%d, tsdb snapshot write for %s succeed", TD_VID(pWriter->pTsdb->pVnode), pWriter->pTsdb->path);
return code;
_err:
tsdbError("vgId:%d, tsdb snapshot write for %s failed since %s", TD_VID(pWriter->pTsdb->pVnode), pWriter->pTsdb->path,
tstrerror(code));
if (code) {
tsdbError("vgId:%d %s failed at line %d since %s, type:%d index:%" PRId64 " size:%" PRId64,
TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code), pHdr->type, pHdr->index, pHdr->size);
} else {
tsdbDebug("vgId:%d %s done, type:%d index:%" PRId64 " size:%" PRId64, TD_VID(pWriter->pTsdb->pVnode), __func__,
pHdr->type, pHdr->index, pHdr->size);
}
return code;
}
......@@ -684,7 +684,7 @@ int32_t tRowMergerInit2(SRowMerger *pMerger, STSchema *pResTSchema, TSDBROW *pRo
tsdbRowGetColVal(pRow, pTSchema, jCol++, pColVal);
if ((!COL_VAL_IS_NONE(pColVal)) && (!COL_VAL_IS_NULL(pColVal)) && IS_VAR_DATA_TYPE(pColVal->type)) {
uint8_t *pVal = pColVal->value.pData;
pColVal->value.pData = NULL;
code = tRealloc(&pColVal->value.pData, pColVal->value.nData);
if (code) goto _exit;
......@@ -757,7 +757,7 @@ int32_t tRowMergerAdd(SRowMerger *pMerger, TSDBROW *pRow, STSchema *pTSchema) {
pTColVal->value.nData = pColVal->value.nData;
if (pTColVal->value.nData) {
memcpy(pTColVal->value.pData, pColVal->value.pData, pTColVal->value.nData);
memcpy(pTColVal->value.pData, pColVal->value.pData, pTColVal->value.nData);
}
pTColVal->flag = 0;
} else {
......@@ -776,7 +776,7 @@ int32_t tRowMergerAdd(SRowMerger *pMerger, TSDBROW *pRow, STSchema *pTSchema) {
code = tRealloc(&tColVal->value.pData, pColVal->value.nData);
if (code) return code;
tColVal->value.nData = pColVal->value.nData;
tColVal->value.nData = pColVal->value.nData;
if (pColVal->value.nData) {
memcpy(tColVal->value.pData, pColVal->value.pData, pColVal->value.nData);
}
......@@ -825,7 +825,7 @@ int32_t tRowMergerInit(SRowMerger *pMerger, TSDBROW *pRow, STSchema *pTSchema) {
tsdbRowGetColVal(pRow, pTSchema, iCol, pColVal);
if ((!COL_VAL_IS_NONE(pColVal)) && (!COL_VAL_IS_NULL(pColVal)) && IS_VAR_DATA_TYPE(pColVal->type)) {
uint8_t *pVal = pColVal->value.pData;
pColVal->value.pData = NULL;
code = tRealloc(&pColVal->value.pData, pColVal->value.nData);
if (code) goto _exit;
......@@ -834,7 +834,7 @@ int32_t tRowMergerInit(SRowMerger *pMerger, TSDBROW *pRow, STSchema *pTSchema) {
memcpy(pColVal->value.pData, pVal, pColVal->value.nData);
}
}
if (taosArrayPush(pMerger->pArray, pColVal) == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _exit;
......@@ -845,7 +845,7 @@ _exit:
return code;
}
void tRowMergerClear(SRowMerger *pMerger) {
void tRowMergerClear(SRowMerger *pMerger) {
for (int32_t iCol = 1; iCol < pMerger->pTSchema->numOfCols; iCol++) {
SColVal *pTColVal = taosArrayGet(pMerger->pArray, iCol);
if (IS_VAR_DATA_TYPE(pTColVal->type)) {
......@@ -853,7 +853,7 @@ void tRowMergerClear(SRowMerger *pMerger) {
}
}
taosArrayDestroy(pMerger->pArray);
taosArrayDestroy(pMerger->pArray);
}
int32_t tRowMerge(SRowMerger *pMerger, TSDBROW *pRow) {
......@@ -876,7 +876,7 @@ int32_t tRowMerge(SRowMerger *pMerger, TSDBROW *pRow) {
pTColVal->value.nData = pColVal->value.nData;
if (pTColVal->value.nData) {
memcpy(pTColVal->value.pData, pColVal->value.pData, pTColVal->value.nData);
memcpy(pTColVal->value.pData, pColVal->value.pData, pTColVal->value.nData);
}
pTColVal->flag = 0;
} else {
......@@ -898,7 +898,7 @@ int32_t tRowMerge(SRowMerger *pMerger, TSDBROW *pRow) {
tColVal->value.nData = pColVal->value.nData;
if (tColVal->value.nData) {
memcpy(tColVal->value.pData, pColVal->value.pData, tColVal->value.nData);
memcpy(tColVal->value.pData, pColVal->value.pData, tColVal->value.nData);
}
tColVal->flag = 0;
} else {
......
......@@ -455,7 +455,7 @@ int32_t vnodeSnapWrite(SVSnapWriter *pWriter, uint8_t *pData, uint32_t nData) {
if (code) goto _err;
}
code = tsdbSnapWrite(pWriter->pTsdbSnapWriter, pData, nData);
code = tsdbSnapWrite(pWriter->pTsdbSnapWriter, pHdr);
if (code) goto _err;
} break;
case SNAP_DATA_TQ_HANDLE: {
......
......@@ -2477,7 +2477,19 @@ static void doStreamIntervalAggImpl(SOperatorInfo* pOperatorInfo, SSDataBlock* p
pInfo->delKey = key;
}
int32_t prevEndPos = (forwardRows - 1) * step + startPos;
ASSERT(pSDataBlock->info.window.skey > 0 && pSDataBlock->info.window.ekey > 0);
if (pSDataBlock->info.window.skey <= 0 || pSDataBlock->info.window.ekey <= 0) {
qError("table uid %" PRIu64 " data block timestamp range may not be calculated! minKey %" PRId64
",maxKey %" PRId64,
pSDataBlock->info.id.uid, pSDataBlock->info.window.skey, pSDataBlock->info.window.ekey);
blockDataUpdateTsWindow(pSDataBlock, 0);
// timestamp of the data is incorrect
if (pSDataBlock->info.window.skey <= 0 || pSDataBlock->info.window.ekey <= 0) {
qError("table uid %" PRIu64 " data block timestamp is out of range! minKey %" PRId64 ",maxKey %" PRId64,
pSDataBlock->info.id.uid, pSDataBlock->info.window.skey, pSDataBlock->info.window.ekey);
}
}
if (IS_FINAL_OP(pInfo)) {
startPos = getNextQualifiedFinalWindow(&pInfo->interval, &nextWin, &pSDataBlock->info, tsCols, prevEndPos);
} else {
......
......@@ -194,6 +194,8 @@ typedef struct SQWorker {
SMsgCb msgCb;
SQWStat stat;
int32_t *destroyed;
int8_t nodeStopped;
} SQWorker;
typedef struct SQWorkerMgmt {
......
......@@ -213,9 +213,15 @@ int32_t qwAcquireTaskCtx(QW_FPARAMS_DEF, SQWTaskCtx **ctx) {
QW_SET_QTID(id, qId, tId, eId);
*ctx = taosHashAcquire(mgmt->ctxHash, id, sizeof(id));
int8_t nodeStopped = atomic_load_8(&mgmt->nodeStopped);
if (NULL == (*ctx)) {
QW_TASK_DLOG_E("task ctx not exist, may be dropped");
QW_ERR_RET(TSDB_CODE_QRY_TASK_CTX_NOT_EXIST);
if (!nodeStopped) {
QW_TASK_DLOG_E("task ctx not exist, may be dropped");
QW_ERR_RET(TSDB_CODE_QRY_TASK_CTX_NOT_EXIST);
} else {
QW_TASK_DLOG_E("node stopped");
QW_ERR_RET(TSDB_CODE_VND_STOPPED);
}
}
return TSDB_CODE_SUCCESS;
......@@ -226,9 +232,16 @@ int32_t qwGetTaskCtx(QW_FPARAMS_DEF, SQWTaskCtx **ctx) {
QW_SET_QTID(id, qId, tId, eId);
*ctx = taosHashGet(mgmt->ctxHash, id, sizeof(id));
int8_t nodeStopped = atomic_load_8(&mgmt->nodeStopped);
if (NULL == (*ctx)) {
QW_TASK_DLOG_E("task ctx not exist, may be dropped");
QW_ERR_RET(TSDB_CODE_QRY_TASK_CTX_NOT_EXIST);
if (!nodeStopped) {
QW_TASK_DLOG_E("task ctx not exist, may be dropped");
QW_ERR_RET(TSDB_CODE_QRY_TASK_CTX_NOT_EXIST);
} else {
QW_TASK_DLOG_E("node stopped");
QW_ERR_RET(TSDB_CODE_VND_STOPPED);
}
}
return TSDB_CODE_SUCCESS;
......
......@@ -1188,6 +1188,9 @@ void qWorkerStopAllTasks(void *qWorkerMgmt) {
uint64_t qId, tId, sId;
int32_t eId;
int64_t rId = 0;
atomic_store_8(&mgmt->nodeStopped, 1);
void *pIter = taosHashIterate(mgmt->ctxHash, NULL);
while (pIter) {
SQWTaskCtx *ctx = (SQWTaskCtx *)pIter;
......
......@@ -207,6 +207,7 @@ void streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId) {
if (ppTask) {
SStreamTask* pTask = *ppTask;
taosHashRemove(pMeta->pTasks, &taskId, sizeof(int32_t));
tdbTbDelete(pMeta->pTaskDb, &taskId, sizeof(int32_t), pMeta->txn);
/*if (pTask->timer) {
* taosTmrStop(pTask->timer);*/
/*pTask->timer = NULL;*/
......
......@@ -89,45 +89,6 @@
// /\ UNCHANGED <<candidateVars, leaderVars>>
//
int32_t syncNodeFollowerCommit(SSyncNode* ths, SyncIndex newCommitIndex) {
ASSERT(false && "deprecated");
if (ths->state != TAOS_SYNC_STATE_FOLLOWER) {
sNTrace(ths, "can not do follower commit");
return -1;
}
// maybe update commit index, leader notice me
if (newCommitIndex > ths->commitIndex) {
// has commit entry in local
if (newCommitIndex <= ths->pLogStore->syncLogLastIndex(ths->pLogStore)) {
// advance commit index to sanpshot first
SSnapshot snapshot;
ths->pFsm->FpGetSnapshotInfo(ths->pFsm, &snapshot);
if (snapshot.lastApplyIndex >= 0 && snapshot.lastApplyIndex > ths->commitIndex) {
SyncIndex commitBegin = ths->commitIndex;
SyncIndex commitEnd = snapshot.lastApplyIndex;
ths->commitIndex = snapshot.lastApplyIndex;
sNTrace(ths, "commit by snapshot from index:%" PRId64 " to index:%" PRId64, commitBegin, commitEnd);
}
SyncIndex beginIndex = ths->commitIndex + 1;
SyncIndex endIndex = newCommitIndex;
// update commit index
ths->commitIndex = newCommitIndex;
// call back Wal
int32_t code = ths->pLogStore->syncLogUpdateCommitIndex(ths->pLogStore, ths->commitIndex);
ASSERT(code == 0);
code = syncNodeDoCommit(ths, beginIndex, endIndex, ths->state);
ASSERT(code == 0);
}
}
return 0;
}
SSyncRaftEntry* syncBuildRaftEntryFromAppendEntries(const SyncAppendEntries* pMsg) {
SSyncRaftEntry* pEntry = taosMemoryMalloc(pMsg->dataLen);
if (pEntry == NULL) {
......@@ -232,256 +193,3 @@ _IGNORE:
rpcFreeCont(rpcRsp.pCont);
return 0;
}
int32_t syncNodeOnAppendEntriesOld(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
SyncAppendEntries* pMsg = pRpcMsg->pCont;
SRpcMsg rpcRsp = {0};
// if already drop replica, do not process
if (!syncNodeInRaftGroup(ths, &(pMsg->srcId))) {
syncLogRecvAppendEntries(ths, pMsg, "not in my config");
goto _IGNORE;
}
// prepare response msg
int32_t code = syncBuildAppendEntriesReply(&rpcRsp, ths->vgId);
if (code != 0) {
syncLogRecvAppendEntries(ths, pMsg, "build rsp error");
goto _IGNORE;
}
SyncAppendEntriesReply* pReply = rpcRsp.pCont;
pReply->srcId = ths->myRaftId;
pReply->destId = pMsg->srcId;
pReply->term = ths->raftStore.currentTerm;
pReply->success = false;
// pReply->matchIndex = ths->pLogStore->syncLogLastIndex(ths->pLogStore);
pReply->matchIndex = SYNC_INDEX_INVALID;
pReply->lastSendIndex = pMsg->prevLogIndex + 1;
pReply->startTime = ths->startTime;
if (pMsg->term < ths->raftStore.currentTerm) {
syncLogRecvAppendEntries(ths, pMsg, "reject, small term");
goto _SEND_RESPONSE;
}
if (pMsg->term > ths->raftStore.currentTerm) {
pReply->term = pMsg->term;
}
syncNodeStepDown(ths, pMsg->term);
syncNodeResetElectTimer(ths);
SyncIndex startIndex = ths->pLogStore->syncLogBeginIndex(ths->pLogStore);
SyncIndex lastIndex = ths->pLogStore->syncLogLastIndex(ths->pLogStore);
if (pMsg->prevLogIndex > lastIndex) {
syncLogRecvAppendEntries(ths, pMsg, "reject, index not match");
goto _SEND_RESPONSE;
}
if (pMsg->prevLogIndex >= startIndex) {
SyncTerm myPreLogTerm = syncNodeGetPreTerm(ths, pMsg->prevLogIndex + 1);
// ASSERT(myPreLogTerm != SYNC_TERM_INVALID);
if (myPreLogTerm == SYNC_TERM_INVALID) {
syncLogRecvAppendEntries(ths, pMsg, "reject, pre-term invalid");
goto _SEND_RESPONSE;
}
if (myPreLogTerm != pMsg->prevLogTerm) {
syncLogRecvAppendEntries(ths, pMsg, "reject, pre-term not match");
goto _SEND_RESPONSE;
}
}
// accept
pReply->success = true;
bool hasAppendEntries = pMsg->dataLen > 0;
if (hasAppendEntries) {
SSyncRaftEntry* pAppendEntry = syncEntryBuildFromAppendEntries(pMsg);
ASSERT(pAppendEntry != NULL);
SyncIndex appendIndex = pMsg->prevLogIndex + 1;
LRUHandle* hLocal = NULL;
LRUHandle* hAppend = NULL;
int32_t code = 0;
SSyncRaftEntry* pLocalEntry = NULL;
SLRUCache* pCache = ths->pLogStore->pCache;
hLocal = taosLRUCacheLookup(pCache, &appendIndex, sizeof(appendIndex));
if (hLocal) {
pLocalEntry = (SSyncRaftEntry*)taosLRUCacheValue(pCache, hLocal);
code = 0;
ths->pLogStore->cacheHit++;
sNTrace(ths, "hit cache index:%" PRId64 ", bytes:%u, %p", appendIndex, pLocalEntry->bytes, pLocalEntry);
} else {
ths->pLogStore->cacheMiss++;
sNTrace(ths, "miss cache index:%" PRId64, appendIndex);
code = ths->pLogStore->syncLogGetEntry(ths->pLogStore, appendIndex, &pLocalEntry);
}
if (code == 0) {
// get local entry success
if (pLocalEntry->term == pAppendEntry->term) {
// do nothing
sNTrace(ths, "log match, do nothing, index:%" PRId64, appendIndex);
} else {
// truncate
code = ths->pLogStore->syncLogTruncate(ths->pLogStore, appendIndex);
if (code != 0) {
char logBuf[128];
snprintf(logBuf, sizeof(logBuf), "ignore, truncate error, append-index:%" PRId64, appendIndex);
syncLogRecvAppendEntries(ths, pMsg, logBuf);
if (hLocal) {
taosLRUCacheRelease(ths->pLogStore->pCache, hLocal, false);
} else {
syncEntryDestroy(pLocalEntry);
}
if (hAppend) {
taosLRUCacheRelease(ths->pLogStore->pCache, hAppend, false);
} else {
syncEntryDestroy(pAppendEntry);
}
goto _IGNORE;
}
ASSERT(pAppendEntry->index == appendIndex);
// append
code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pAppendEntry, false);
if (code != 0) {
char logBuf[128];
snprintf(logBuf, sizeof(logBuf), "ignore, append error, append-index:%" PRId64, appendIndex);
syncLogRecvAppendEntries(ths, pMsg, logBuf);
if (hLocal) {
taosLRUCacheRelease(ths->pLogStore->pCache, hLocal, false);
} else {
syncEntryDestroy(pLocalEntry);
}
if (hAppend) {
taosLRUCacheRelease(ths->pLogStore->pCache, hAppend, false);
} else {
syncEntryDestroy(pAppendEntry);
}
goto _IGNORE;
}
syncCacheEntry(ths->pLogStore, pAppendEntry, &hAppend);
}
} else {
if (terrno == TSDB_CODE_WAL_LOG_NOT_EXIST) {
// log not exist
// truncate
code = ths->pLogStore->syncLogTruncate(ths->pLogStore, appendIndex);
if (code != 0) {
char logBuf[128];
snprintf(logBuf, sizeof(logBuf), "ignore, log not exist, truncate error, append-index:%" PRId64, appendIndex);
syncLogRecvAppendEntries(ths, pMsg, logBuf);
syncEntryDestroy(pLocalEntry);
syncEntryDestroy(pAppendEntry);
goto _IGNORE;
}
// append
code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pAppendEntry, false);
if (code != 0) {
char logBuf[128];
snprintf(logBuf, sizeof(logBuf), "ignore, log not exist, append error, append-index:%" PRId64, appendIndex);
syncLogRecvAppendEntries(ths, pMsg, logBuf);
if (hLocal) {
taosLRUCacheRelease(ths->pLogStore->pCache, hLocal, false);
} else {
syncEntryDestroy(pLocalEntry);
}
if (hAppend) {
taosLRUCacheRelease(ths->pLogStore->pCache, hAppend, false);
} else {
syncEntryDestroy(pAppendEntry);
}
goto _IGNORE;
}
syncCacheEntry(ths->pLogStore, pAppendEntry, &hAppend);
} else {
// get local entry success
char logBuf[128];
snprintf(logBuf, sizeof(logBuf), "ignore, get local entry error, append-index:%" PRId64 " err:%d", appendIndex,
terrno);
syncLogRecvAppendEntries(ths, pMsg, logBuf);
if (hLocal) {
taosLRUCacheRelease(ths->pLogStore->pCache, hLocal, false);
} else {
syncEntryDestroy(pLocalEntry);
}
if (hAppend) {
taosLRUCacheRelease(ths->pLogStore->pCache, hAppend, false);
} else {
syncEntryDestroy(pAppendEntry);
}
goto _IGNORE;
}
}
// update match index
pReply->matchIndex = pAppendEntry->index;
if (hLocal) {
taosLRUCacheRelease(ths->pLogStore->pCache, hLocal, false);
} else {
syncEntryDestroy(pLocalEntry);
}
if (hAppend) {
taosLRUCacheRelease(ths->pLogStore->pCache, hAppend, false);
} else {
syncEntryDestroy(pAppendEntry);
}
} else {
// no append entries, do nothing
// maybe has extra entries, no harm
// update match index
pReply->matchIndex = pMsg->prevLogIndex;
}
// maybe update commit index, leader notice me
syncNodeFollowerCommit(ths, pMsg->commitIndex);
syncLogRecvAppendEntries(ths, pMsg, "accept");
goto _SEND_RESPONSE;
_IGNORE:
rpcFreeCont(rpcRsp.pCont);
return 0;
_SEND_RESPONSE:
// msg event log
syncLogSendAppendEntriesReply(ths, pReply, "");
// send response
syncNodeSendMsgById(&pReply->destId, ths, &rpcRsp);
return 0;
}
......@@ -89,63 +89,3 @@ int32_t syncNodeOnAppendEntriesReply(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
}
return 0;
}
int32_t syncNodeOnAppendEntriesReplyOld(SSyncNode* ths, SyncAppendEntriesReply* pMsg) {
int32_t ret = 0;
// if already drop replica, do not process
if (!syncNodeInRaftGroup(ths, &(pMsg->srcId))) {
syncLogRecvAppendEntriesReply(ths, pMsg, "not in my config");
return 0;
}
// drop stale response
if (pMsg->term < ths->raftStore.currentTerm) {
syncLogRecvAppendEntriesReply(ths, pMsg, "drop stale response");
return 0;
}
if (ths->state == TAOS_SYNC_STATE_LEADER) {
if (pMsg->term > ths->raftStore.currentTerm) {
syncLogRecvAppendEntriesReply(ths, pMsg, "error term");
syncNodeStepDown(ths, pMsg->term);
return -1;
}
ASSERT(pMsg->term == ths->raftStore.currentTerm);
if (pMsg->success) {
SyncIndex oldMatchIndex = syncIndexMgrGetIndex(ths->pMatchIndex, &(pMsg->srcId));
if (pMsg->matchIndex > oldMatchIndex) {
syncIndexMgrSetIndex(ths->pMatchIndex, &(pMsg->srcId), pMsg->matchIndex);
syncMaybeAdvanceCommitIndex(ths);
// maybe update minMatchIndex
ths->minMatchIndex = syncMinMatchIndex(ths);
}
syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), pMsg->matchIndex + 1);
} else {
SyncIndex nextIndex = syncIndexMgrGetIndex(ths->pNextIndex, &(pMsg->srcId));
if (nextIndex > SYNC_INDEX_BEGIN) {
--nextIndex;
}
syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), nextIndex);
}
// send next append entries
SPeerState* pState = syncNodeGetPeerState(ths, &(pMsg->srcId));
ASSERT(pState != NULL);
if (pMsg->lastSendIndex == pState->lastSendIndex) {
int64_t timeNow = taosGetTimestampMs();
int64_t elapsed = timeNow - pState->lastSendTime;
sNTrace(ths, "sync-append-entries rtt elapsed:%" PRId64 ", index:%" PRId64, elapsed, pState->lastSendIndex);
syncNodeReplicateOne(ths, &(pMsg->srcId), true);
}
}
syncLogRecvAppendEntriesReply(ths, pMsg, "process");
return 0;
}
......@@ -43,148 +43,6 @@
// IN commitIndex' = [commitIndex EXCEPT ![i] = newCommitIndex]
// /\ UNCHANGED <<messages, serverVars, candidateVars, leaderVars, log>>
//
void syncOneReplicaAdvance(SSyncNode* pSyncNode) {
ASSERT(false && "deprecated");
if (pSyncNode == NULL) {
sError("pSyncNode is NULL");
return;
}
if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) {
sNError(pSyncNode, "not leader, can not advance commit index");
return;
}
if (pSyncNode->replicaNum != 1) {
sNError(pSyncNode, "not one replica, can not advance commit index");
return;
}
// advance commit index to snapshot first
SSnapshot snapshot;
pSyncNode->pFsm->FpGetSnapshotInfo(pSyncNode->pFsm, &snapshot);
if (snapshot.lastApplyIndex > 0 && snapshot.lastApplyIndex > pSyncNode->commitIndex) {
SyncIndex commitBegin = pSyncNode->commitIndex;
SyncIndex commitEnd = snapshot.lastApplyIndex;
pSyncNode->commitIndex = snapshot.lastApplyIndex;
sNTrace(pSyncNode, "commit by snapshot from index:%" PRId64 " to index:%" PRId64, commitBegin, commitEnd);
}
// advance commit index as large as possible
SyncIndex lastIndex = syncNodeGetLastIndex(pSyncNode);
if (lastIndex > pSyncNode->commitIndex) {
sNTrace(pSyncNode, "commit by wal from index:%" PRId64 " to index:%" PRId64, pSyncNode->commitIndex + 1, lastIndex);
pSyncNode->commitIndex = lastIndex;
}
// call back Wal
SyncIndex walCommitVer = logStoreWalCommitVer(pSyncNode->pLogStore);
if (pSyncNode->commitIndex > walCommitVer) {
pSyncNode->pLogStore->syncLogUpdateCommitIndex(pSyncNode->pLogStore, pSyncNode->commitIndex);
}
}
void syncMaybeAdvanceCommitIndex(SSyncNode* pSyncNode) {
ASSERTS(false, "deprecated");
if (pSyncNode == NULL) {
sError("pSyncNode is NULL");
return;
}
if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) {
sNError(pSyncNode, "not leader, can not advance commit index");
return;
}
// advance commit index to sanpshot first
SSnapshot snapshot;
pSyncNode->pFsm->FpGetSnapshotInfo(pSyncNode->pFsm, &snapshot);
if (snapshot.lastApplyIndex > 0 && snapshot.lastApplyIndex > pSyncNode->commitIndex) {
SyncIndex commitBegin = pSyncNode->commitIndex;
SyncIndex commitEnd = snapshot.lastApplyIndex;
pSyncNode->commitIndex = snapshot.lastApplyIndex;
sNTrace(pSyncNode, "commit by snapshot from index:%" PRId64 " to index:%" PRId64, commitBegin, commitEnd);
}
// update commit index
SyncIndex newCommitIndex = pSyncNode->commitIndex;
for (SyncIndex index = syncNodeGetLastIndex(pSyncNode); index > pSyncNode->commitIndex; --index) {
bool agree = syncAgree(pSyncNode, index);
if (agree) {
// term
SSyncRaftEntry* pEntry = NULL;
SLRUCache* pCache = pSyncNode->pLogStore->pCache;
LRUHandle* h = taosLRUCacheLookup(pCache, &index, sizeof(index));
if (h) {
pEntry = (SSyncRaftEntry*)taosLRUCacheValue(pCache, h);
pSyncNode->pLogStore->cacheHit++;
sNTrace(pSyncNode, "hit cache index:%" PRId64 ", bytes:%u, %p", index, pEntry->bytes, pEntry);
} else {
pSyncNode->pLogStore->cacheMiss++;
sNTrace(pSyncNode, "miss cache index:%" PRId64, index);
int32_t code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, index, &pEntry);
if (code != 0) {
sNError(pSyncNode, "advance commit index error, read wal index:%" PRId64, index);
return;
}
}
// cannot commit, even if quorum agree. need check term!
if (pEntry->term <= pSyncNode->raftStore.currentTerm) {
// update commit index
newCommitIndex = index;
if (h) {
taosLRUCacheRelease(pCache, h, false);
} else {
syncEntryDestroy(pEntry);
}
break;
} else {
sNTrace(pSyncNode, "can not commit due to term not equal, index:%" PRId64 ", term:%" PRIu64, pEntry->index,
pEntry->term);
}
if (h) {
taosLRUCacheRelease(pCache, h, false);
} else {
syncEntryDestroy(pEntry);
}
}
}
// advance commit index as large as possible
SyncIndex walCommitVer = logStoreWalCommitVer(pSyncNode->pLogStore);
if (walCommitVer > newCommitIndex) {
newCommitIndex = walCommitVer;
}
// maybe execute fsm
if (newCommitIndex > pSyncNode->commitIndex) {
SyncIndex beginIndex = pSyncNode->commitIndex + 1;
SyncIndex endIndex = newCommitIndex;
// update commit index
pSyncNode->commitIndex = newCommitIndex;
// call back Wal
pSyncNode->pLogStore->syncLogUpdateCommitIndex(pSyncNode->pLogStore, pSyncNode->commitIndex);
// execute fsm
if (pSyncNode != NULL && pSyncNode->pFsm != NULL) {
int32_t code = syncNodeDoCommit(pSyncNode, beginIndex, endIndex, pSyncNode->state);
if (code != 0) {
sNError(pSyncNode, "advance commit index error, do commit begin:%" PRId64 ", end:%" PRId64, beginIndex,
endIndex);
return;
}
}
}
}
bool syncAgreeIndex(SSyncNode* pSyncNode, SRaftId* pRaftId, SyncIndex index) {
// I am leader, I agree
......@@ -210,83 +68,7 @@ static inline int64_t syncNodeAbs64(int64_t a, int64_t b) {
return c;
}
int32_t syncNodeDynamicQuorum(const SSyncNode* pSyncNode) {
return pSyncNode->quorum;
#if 0
int32_t quorum = 1; // self
int64_t timeNow = taosGetTimestampMs();
for (int i = 0; i < pSyncNode->peersNum; ++i) {
int64_t peerStartTime = syncIndexMgrGetStartTime(pSyncNode->pNextIndex, &(pSyncNode->peersId)[i]);
int64_t peerRecvTime = syncIndexMgrGetRecvTime(pSyncNode->pNextIndex, &(pSyncNode->peersId)[i]);
SyncIndex peerMatchIndex = syncIndexMgrGetIndex(pSyncNode->pMatchIndex, &(pSyncNode->peersId)[i]);
int64_t recvTimeDiff = TABS(peerRecvTime - timeNow);
int64_t startTimeDiff = TABS(peerStartTime - pSyncNode->startTime);
int64_t logDiff = TABS(peerMatchIndex - syncNodeGetLastIndex(pSyncNode));
/*
int64_t recvTimeDiff = syncNodeAbs64(peerRecvTime, timeNow);
int64_t startTimeDiff = syncNodeAbs64(peerStartTime, pSyncNode->startTime);
int64_t logDiff = syncNodeAbs64(peerMatchIndex, syncNodeGetLastIndex(pSyncNode));
*/
int32_t addQuorum = 0;
if (recvTimeDiff < SYNC_MAX_RECV_TIME_RANGE_MS) {
if (startTimeDiff < SYNC_MAX_START_TIME_RANGE_MS) {
addQuorum = 1;
} else {
if (logDiff < SYNC_ADD_QUORUM_COUNT) {
addQuorum = 1;
} else {
addQuorum = 0;
}
}
} else {
addQuorum = 0;
}
/*
if (recvTimeDiff < SYNC_MAX_RECV_TIME_RANGE_MS) {
addQuorum = 1;
} else {
addQuorum = 0;
}
if (startTimeDiff > SYNC_MAX_START_TIME_RANGE_MS) {
addQuorum = 0;
}
*/
quorum += addQuorum;
}
ASSERT(quorum <= pSyncNode->replicaNum);
if (quorum < pSyncNode->quorum) {
quorum = pSyncNode->quorum;
}
return quorum;
#endif
}
/*
bool syncAgree(SSyncNode* pSyncNode, SyncIndex index) {
int agreeCount = 0;
for (int i = 0; i < pSyncNode->replicaNum; ++i) {
if (syncAgreeIndex(pSyncNode, &(pSyncNode->replicasId[i]), index)) {
++agreeCount;
}
if (agreeCount >= syncNodeDynamicQuorum(pSyncNode)) {
return true;
}
}
return false;
}
*/
int32_t syncNodeDynamicQuorum(const SSyncNode* pSyncNode) { return pSyncNode->quorum; }
bool syncNodeAgreedUpon(SSyncNode* pNode, SyncIndex index) {
int count = 0;
......
......@@ -43,7 +43,10 @@ static int32_t syncNodeRequestVotePeers(SSyncNode* pNode) {
for (int i = 0; i < pNode->peersNum; ++i) {
SRpcMsg rpcMsg = {0};
ret = syncBuildRequestVote(&rpcMsg, pNode->vgId);
ASSERT(ret == 0);
if (ret < 0) {
sError("vgId:%d, failed to build request-vote msg since %s", pNode->vgId, terrstr());
continue;
}
SyncRequestVote* pMsg = rpcMsg.pCont;
pMsg->srcId = pNode->myRaftId;
......@@ -51,13 +54,18 @@ static int32_t syncNodeRequestVotePeers(SSyncNode* pNode) {
pMsg->term = pNode->raftStore.currentTerm;
ret = syncNodeGetLastIndexTerm(pNode, &pMsg->lastLogIndex, &pMsg->lastLogTerm);
ASSERT(ret == 0);
if (ret < 0) {
sError("vgId:%d, failed to get index and term of last log since %s", pNode->vgId, terrstr());
continue;
}
ret = syncNodeSendMsgById(&pNode->peersId[i], pNode, &rpcMsg);
ASSERT(ret == 0);
if (ret < 0) {
sError("vgId:%d, failed to send msg to peerId:%" PRId64, pNode->vgId, pNode->peersId[i].addr);
continue;
}
}
return ret;
return 0;
}
int32_t syncNodeElect(SSyncNode* pSyncNode) {
......
......@@ -292,8 +292,6 @@ int32_t syncBeginSnapshot(int64_t rid, int64_t lastApplyIndex) {
goto _DEL_WAL;
} else {
lastApplyIndex -= SYNC_VNODE_LOG_RETENTION;
SyncIndex beginIndex = pSyncNode->pLogStore->syncLogBeginIndex(pSyncNode->pLogStore);
SyncIndex endIndex = pSyncNode->pLogStore->syncLogEndIndex(pSyncNode->pLogStore);
bool isEmpty = pSyncNode->pLogStore->syncLogIsEmpty(pSyncNode->pLogStore);
......@@ -308,6 +306,8 @@ int32_t syncBeginSnapshot(int64_t rid, int64_t lastApplyIndex) {
if (pSyncNode->replicaNum > 1) {
// multi replicas
lastApplyIndex = TMAX(lastApplyIndex - SYNC_VNODE_LOG_RETENTION, beginIndex - 1);
if (pSyncNode->state == TAOS_SYNC_STATE_LEADER) {
pSyncNode->minMatchIndex = syncMinMatchIndex(pSyncNode);
......@@ -586,78 +586,6 @@ SSyncState syncGetState(int64_t rid) {
return state;
}
#if 0
int32_t syncGetSnapshotByIndex(int64_t rid, SyncIndex index, SSnapshot* pSnapshot) {
if (index < SYNC_INDEX_BEGIN) {
return -1;
}
SSyncNode* pSyncNode = syncNodeAcquire(rid);
if (pSyncNode == NULL) {
return -1;
}
ASSERT(rid == pSyncNode->rid);
SSyncRaftEntry* pEntry = NULL;
int32_t code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, index, &pEntry);
if (code != 0) {
if (pEntry != NULL) {
syncEntryDestroy(pEntry);
}
syncNodeRelease(pSyncNode);
return -1;
}
ASSERT(pEntry != NULL);
pSnapshot->data = NULL;
pSnapshot->lastApplyIndex = index;
pSnapshot->lastApplyTerm = pEntry->term;
pSnapshot->lastConfigIndex = syncNodeGetSnapshotConfigIndex(pSyncNode, index);
syncEntryDestroy(pEntry);
syncNodeRelease(pSyncNode);
return 0;
}
int32_t syncGetSnapshotMeta(int64_t rid, struct SSnapshotMeta* sMeta) {
SSyncNode* pSyncNode = syncNodeAcquire(rid);
if (pSyncNode == NULL) {
return -1;
}
ASSERT(rid == pSyncNode->rid);
sMeta->lastConfigIndex = pSyncNode->raftCfg.lastConfigIndex;
sTrace("vgId:%d, get snapshot meta, lastConfigIndex:%" PRId64, pSyncNode->vgId, pSyncNode->raftCfg.lastConfigIndex);
syncNodeRelease(pSyncNode);
return 0;
}
int32_t syncGetSnapshotMetaByIndex(int64_t rid, SyncIndex snapshotIndex, struct SSnapshotMeta* sMeta) {
SSyncNode* pSyncNode = syncNodeAcquire(rid);
if (pSyncNode == NULL) {
return -1;
}
ASSERT(rid == pSyncNode->rid);
ASSERT(pSyncNode->raftCfg.configIndexCount >= 1);
SyncIndex lastIndex = (pSyncNode->raftCfg.configIndexArr)[0];
for (int32_t i = 0; i < pSyncNode->raftCfg.configIndexCount; ++i) {
if ((pSyncNode->raftCfg.configIndexArr)[i] > lastIndex &&
(pSyncNode->raftCfg.configIndexArr)[i] <= snapshotIndex) {
lastIndex = (pSyncNode->raftCfg.configIndexArr)[i];
}
}
sMeta->lastConfigIndex = lastIndex;
sTrace("vgId:%d, get snapshot meta by index:%" PRId64 " lcindex:%" PRId64, pSyncNode->vgId, snapshotIndex,
sMeta->lastConfigIndex);
syncNodeRelease(pSyncNode);
return 0;
}
#endif
SyncIndex syncNodeGetSnapshotConfigIndex(SSyncNode* pSyncNode, SyncIndex snapshotLastApplyIndex) {
ASSERT(pSyncNode->raftCfg.configIndexCount >= 1);
SyncIndex lastIndex = (pSyncNode->raftCfg.configIndexArr)[0];
......@@ -1031,9 +959,12 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) {
pSyncNode->commitIndex = commitIndex;
sInfo("vgId:%d, sync node commitIndex initialized as %" PRId64, pSyncNode->vgId, pSyncNode->commitIndex);
// restore log store on need
if (syncNodeLogStoreRestoreOnNeed(pSyncNode) < 0) {
sError("vgId:%d, failed to restore log store since %s.", pSyncNode->vgId, terrstr());
goto _error;
}
// timer ms init
pSyncNode->pingBaseLine = PING_TIMER_MS;
pSyncNode->electBaseLine = tsElectInterval;
......@@ -1096,10 +1027,16 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) {
pSyncNode->changing = false;
// replication mgr
syncNodeLogReplMgrInit(pSyncNode);
if (syncNodeLogReplMgrInit(pSyncNode) < 0) {
sError("vgId:%d, failed to init repl mgr since %s.", pSyncNode->vgId, terrstr());
goto _error;
}
// peer state
syncNodePeerStateInit(pSyncNode);
if (syncNodePeerStateInit(pSyncNode) < 0) {
sError("vgId:%d, failed to init peer stat since %s.", pSyncNode->vgId, terrstr());
goto _error;
}
//
// min match index
......@@ -1194,27 +1131,10 @@ int32_t syncNodeStart(SSyncNode* pSyncNode) {
int32_t ret = 0;
ret = syncNodeStartPingTimer(pSyncNode);
ASSERT(ret == 0);
return ret;
}
void syncNodeStartOld(SSyncNode* pSyncNode) {
// start raft
if (pSyncNode->replicaNum == 1) {
raftStoreNextTerm(pSyncNode);
syncNodeBecomeLeader(pSyncNode, "one replica start");
// Raft 3.6.2 Committing entries from previous terms
syncNodeAppendNoop(pSyncNode);
syncMaybeAdvanceCommitIndex(pSyncNode);
} else {
syncNodeBecomeFollower(pSyncNode, "first start");
if (ret != 0) {
sError("vgId:%d, failed to start ping timer since %s", pSyncNode->vgId, terrstr());
}
int32_t ret = 0;
ret = syncNodeStartPingTimer(pSyncNode);
ASSERT(ret == 0);
return ret;
}
int32_t syncNodeStartStandBy(SSyncNode* pSyncNode) {
......@@ -1225,11 +1145,16 @@ int32_t syncNodeStartStandBy(SSyncNode* pSyncNode) {
// reset elect timer, long enough
int32_t electMS = TIMER_MAX_MS;
int32_t ret = syncNodeRestartElectTimer(pSyncNode, electMS);
ASSERT(ret == 0);
if (ret < 0) {
sError("vgId:%d, failed to restart elect timer since %s", pSyncNode->vgId, terrstr());
return -1;
}
ret = 0;
ret = syncNodeStartPingTimer(pSyncNode);
ASSERT(ret == 0);
if (ret < 0) {
sError("vgId:%d, failed to start ping timer since %s", pSyncNode->vgId, terrstr());
return -1;
}
return ret;
}
......@@ -1818,12 +1743,6 @@ void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr) {
pSyncNode->leaderCache = pSyncNode->myRaftId;
for (int32_t i = 0; i < pSyncNode->pNextIndex->replicaNum; ++i) {
// maybe overwrite myself, no harm
// just do it!
// pSyncNode->pNextIndex->index[i] = pSyncNode->pLogStore->getLastIndex(pSyncNode->pLogStore) + 1;
// maybe wal is deleted
SyncIndex lastIndex;
SyncTerm lastTerm;
int32_t code = syncNodeGetLastIndexTerm(pSyncNode, &lastIndex, &lastTerm);
......@@ -1885,7 +1804,11 @@ void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr) {
void syncNodeCandidate2Leader(SSyncNode* pSyncNode) {
ASSERT(pSyncNode->state == TAOS_SYNC_STATE_CANDIDATE);
ASSERT(voteGrantedMajority(pSyncNode->pVotesGranted));
bool granted = voteGrantedMajority(pSyncNode->pVotesGranted);
if (!granted) {
sError("vgId:%d, not granted by majority.", pSyncNode->vgId);
return;
}
syncNodeBecomeLeader(pSyncNode, "candidate to leader");
sNTrace(pSyncNode, "state change syncNodeCandidate2Leader");
......@@ -1901,20 +1824,6 @@ void syncNodeCandidate2Leader(SSyncNode* pSyncNode) {
pSyncNode->vgId, pSyncNode->raftStore.currentTerm, pSyncNode->commitIndex, lastIndex);
}
void syncNodeCandidate2LeaderOld(SSyncNode* pSyncNode) {
ASSERT(pSyncNode->state == TAOS_SYNC_STATE_CANDIDATE);
ASSERT(voteGrantedMajority(pSyncNode->pVotesGranted));
syncNodeBecomeLeader(pSyncNode, "candidate to leader");
// Raft 3.6.2 Committing entries from previous terms
syncNodeAppendNoop(pSyncNode);
syncMaybeAdvanceCommitIndex(pSyncNode);
if (pSyncNode->replicaNum > 1) {
syncNodeReplicate(pSyncNode);
}
}
bool syncNodeIsMnode(SSyncNode* pSyncNode) { return (pSyncNode->vgId == 1); }
int32_t syncNodePeerStateInit(SSyncNode* pSyncNode) {
......@@ -1960,7 +1869,8 @@ void syncNodeCandidate2Follower(SSyncNode* pSyncNode) {
// need assert
void syncNodeVoteForTerm(SSyncNode* pSyncNode, SyncTerm term, SRaftId* pRaftId) {
ASSERT(term == pSyncNode->raftStore.currentTerm);
ASSERT(!raftStoreHasVoted(pSyncNode));
bool voted = raftStoreHasVoted(pSyncNode);
ASSERT(!voted);
raftStoreVote(pSyncNode, pRaftId);
}
......@@ -2638,24 +2548,6 @@ int32_t syncNodeOnLocalCmd(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
return 0;
}
int32_t syncNodeOnLocalCmdOld(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
ASSERT(false && "deprecated");
SyncLocalCmd* pMsg = pRpcMsg->pCont;
syncLogRecvLocalCmd(ths, pMsg, "");
if (pMsg->cmd == SYNC_LOCAL_CMD_STEP_DOWN) {
syncNodeStepDown(ths, pMsg->currentTerm);
} else if (pMsg->cmd == SYNC_LOCAL_CMD_FOLLOWER_CMT) {
syncNodeFollowerCommit(ths, pMsg->commitIndex);
} else {
sError("error local cmd");
}
return 0;
}
// TLA+ Spec
// ClientRequest(i, v) ==
// /\ state[i] = Leader
......@@ -2700,96 +2592,6 @@ int32_t syncNodeOnClientRequest(SSyncNode* ths, SRpcMsg* pMsg, SyncIndex* pRetIn
}
}
int32_t syncNodeOnClientRequestOld(SSyncNode* ths, SRpcMsg* pMsg, SyncIndex* pRetIndex) {
sNTrace(ths, "on client request");
int32_t ret = 0;
int32_t code = 0;
SyncIndex index = ths->pLogStore->syncLogWriteIndex(ths->pLogStore);
SyncTerm term = ths->raftStore.currentTerm;
SSyncRaftEntry* pEntry;
if (pMsg->msgType == TDMT_SYNC_CLIENT_REQUEST) {
pEntry = syncEntryBuildFromClientRequest(pMsg->pCont, term, index);
} else {
pEntry = syncEntryBuildFromRpcMsg(pMsg, term, index);
}
LRUHandle* h = NULL;
if (ths->state == TAOS_SYNC_STATE_LEADER) {
// append entry
code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pEntry, false);
if (code != 0) {
if (ths->replicaNum == 1) {
if (h) {
taosLRUCacheRelease(ths->pLogStore->pCache, h, false);
} else {
syncEntryDestroy(pEntry);
}
return -1;
} else {
// del resp mgr, call FpCommitCb
SFsmCbMeta cbMeta = {
.index = pEntry->index,
.lastConfigIndex = SYNC_INDEX_INVALID,
.isWeak = pEntry->isWeak,
.code = -1,
.state = ths->state,
.seqNum = pEntry->seqNum,
.term = pEntry->term,
.currentTerm = ths->raftStore.currentTerm,
.flag = 0,
};
ths->pFsm->FpCommitCb(ths->pFsm, pMsg, &cbMeta);
if (h) {
taosLRUCacheRelease(ths->pLogStore->pCache, h, false);
} else {
syncEntryDestroy(pEntry);
}
return -1;
}
}
syncCacheEntry(ths->pLogStore, pEntry, &h);
// if mulit replica, start replicate right now
if (ths->replicaNum > 1) {
syncNodeReplicate(ths);
}
// if only myself, maybe commit right now
if (ths->replicaNum == 1) {
if (syncNodeIsMnode(ths)) {
syncMaybeAdvanceCommitIndex(ths);
} else {
syncOneReplicaAdvance(ths);
}
}
}
if (pRetIndex != NULL) {
if (ret == 0 && pEntry != NULL) {
*pRetIndex = pEntry->index;
} else {
*pRetIndex = SYNC_INDEX_INVALID;
}
}
if (h) {
taosLRUCacheRelease(ths->pLogStore->pCache, h, false);
} else {
syncEntryDestroy(pEntry);
}
return ret;
}
const char* syncStr(ESyncState state) {
switch (state) {
case TAOS_SYNC_STATE_FOLLOWER:
......@@ -2894,129 +2696,6 @@ bool syncNodeIsOptimizedOneReplica(SSyncNode* ths, SRpcMsg* pMsg) {
return (ths->replicaNum == 1 && syncUtilUserCommit(pMsg->msgType) && ths->vgId != 1);
}
int32_t syncNodeDoCommit(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endIndex, uint64_t flag) {
ASSERT(false);
if (beginIndex > endIndex) {
return 0;
}
if (ths == NULL) {
return -1;
}
if (ths->pFsm != NULL && ths->pFsm->FpGetSnapshotInfo != NULL) {
// advance commit index to sanpshot first
SSnapshot snapshot = {0};
ths->pFsm->FpGetSnapshotInfo(ths->pFsm, &snapshot);
if (snapshot.lastApplyIndex >= 0 && snapshot.lastApplyIndex >= beginIndex) {
sNTrace(ths, "commit by snapshot from index:%" PRId64 " to index:%" PRId64, beginIndex, snapshot.lastApplyIndex);
// update begin index
beginIndex = snapshot.lastApplyIndex + 1;
}
}
int32_t code = 0;
ESyncState state = flag;
sNTrace(ths, "commit by wal from index:%" PRId64 " to index:%" PRId64, beginIndex, endIndex);
// execute fsm
if (ths->pFsm != NULL) {
for (SyncIndex i = beginIndex; i <= endIndex; ++i) {
if (i != SYNC_INDEX_INVALID) {
SSyncRaftEntry* pEntry;
SLRUCache* pCache = ths->pLogStore->pCache;
LRUHandle* h = taosLRUCacheLookup(pCache, &i, sizeof(i));
if (h) {
pEntry = (SSyncRaftEntry*)taosLRUCacheValue(pCache, h);
ths->pLogStore->cacheHit++;
sNTrace(ths, "hit cache index:%" PRId64 ", bytes:%u, %p", i, pEntry->bytes, pEntry);
} else {
ths->pLogStore->cacheMiss++;
sNTrace(ths, "miss cache index:%" PRId64, i);
code = ths->pLogStore->syncLogGetEntry(ths->pLogStore, i, &pEntry);
// ASSERT(code == 0);
// ASSERT(pEntry != NULL);
if (code != 0 || pEntry == NULL) {
sNError(ths, "get log entry error");
sFatal("vgId:%d, get log entry %" PRId64 " error when commit since %s", ths->vgId, i, terrstr());
continue;
}
}
SRpcMsg rpcMsg = {0};
syncEntry2OriginalRpc(pEntry, &rpcMsg);
sTrace("do commit index:%" PRId64 ", type:%s", i, TMSG_INFO(pEntry->msgType));
// user commit
if ((ths->pFsm->FpCommitCb != NULL) && syncUtilUserCommit(pEntry->originalRpcType)) {
bool internalExecute = true;
if ((ths->replicaNum == 1) && ths->restoreFinish && ths->vgId != 1) {
internalExecute = false;
}
sNTrace(ths, "user commit index:%" PRId64 ", internal:%d, type:%s", i, internalExecute,
TMSG_INFO(pEntry->msgType));
// execute fsm in apply thread, or execute outside syncPropose
if (internalExecute) {
SFsmCbMeta cbMeta = {
.index = pEntry->index,
.lastConfigIndex = syncNodeGetSnapshotConfigIndex(ths, pEntry->index),
.isWeak = pEntry->isWeak,
.code = 0,
.state = ths->state,
.seqNum = pEntry->seqNum,
.term = pEntry->term,
.currentTerm = ths->raftStore.currentTerm,
.flag = flag,
};
syncRespMgrGetAndDel(ths->pSyncRespMgr, cbMeta.seqNum, &rpcMsg.info);
ths->pFsm->FpCommitCb(ths->pFsm, &rpcMsg, &cbMeta);
}
}
#if 0
// execute in pre-commit
// leader transfer
if (pEntry->originalRpcType == TDMT_SYNC_LEADER_TRANSFER) {
code = syncDoLeaderTransfer(ths, &rpcMsg, pEntry);
ASSERT(code == 0);
}
#endif
// restore finish
// if only snapshot, a noop entry will be append, so syncLogLastIndex is always ok
if (pEntry->index == ths->pLogStore->syncLogLastIndex(ths->pLogStore)) {
if (ths->restoreFinish == false) {
if (ths->pFsm->FpRestoreFinishCb != NULL) {
ths->pFsm->FpRestoreFinishCb(ths->pFsm);
}
ths->restoreFinish = true;
int64_t restoreDelay = taosGetTimestampMs() - ths->leaderTime;
sNTrace(ths, "restore finish, index:%" PRId64 ", elapsed:%" PRId64 " ms", pEntry->index, restoreDelay);
}
}
rpcFreeCont(rpcMsg.pCont);
if (h) {
taosLRUCacheRelease(pCache, h, false);
} else {
syncEntryDestroy(pEntry);
}
}
}
}
return 0;
}
bool syncNodeInRaftGroup(SSyncNode* ths, SRaftId* pRaftId) {
for (int32_t i = 0; i < ths->replicaNum; ++i) {
if (syncUtilSameId(&((ths->replicasId)[i]), pRaftId)) {
......
......@@ -945,8 +945,11 @@ int32_t syncNodeLogReplMgrInit(SSyncNode* pNode) {
for (int i = 0; i < TSDB_MAX_REPLICA; i++) {
ASSERT(pNode->logReplMgrs[i] == NULL);
pNode->logReplMgrs[i] = syncLogReplMgrCreate();
if (pNode->logReplMgrs[i] == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
pNode->logReplMgrs[i]->peerId = i;
ASSERTS(pNode->logReplMgrs[i] != NULL, "Out of memory.");
}
return 0;
}
......
......@@ -48,92 +48,6 @@
int32_t syncNodeMaybeSendAppendEntries(SSyncNode* pSyncNode, const SRaftId* destRaftId, SRpcMsg* pRpcMsg);
int32_t syncNodeReplicateOne(SSyncNode* pSyncNode, SRaftId* pDestId, bool snapshot) {
ASSERT(false && "deprecated");
// next index
SyncIndex nextIndex = syncIndexMgrGetIndex(pSyncNode->pNextIndex, pDestId);
if (snapshot) {
// maybe start snapshot
SyncIndex logStartIndex = pSyncNode->pLogStore->syncLogBeginIndex(pSyncNode->pLogStore);
SyncIndex logEndIndex = pSyncNode->pLogStore->syncLogEndIndex(pSyncNode->pLogStore);
if (nextIndex < logStartIndex || nextIndex - 1 > logEndIndex) {
sNTrace(pSyncNode, "maybe start snapshot for next-index:%" PRId64 ", start:%" PRId64 ", end:%" PRId64, nextIndex,
logStartIndex, logEndIndex);
// start snapshot
int32_t code = syncNodeStartSnapshot(pSyncNode, pDestId);
}
}
// pre index, pre term
SyncIndex preLogIndex = syncNodeGetPreIndex(pSyncNode, nextIndex);
SyncTerm preLogTerm = syncNodeGetPreTerm(pSyncNode, nextIndex);
// prepare entry
SRpcMsg rpcMsg = {0};
SyncAppendEntries* pMsg = NULL;
SSyncRaftEntry* pEntry = NULL;
SLRUCache* pCache = pSyncNode->pLogStore->pCache;
LRUHandle* h = taosLRUCacheLookup(pCache, &nextIndex, sizeof(nextIndex));
int32_t code = 0;
if (h) {
pEntry = (SSyncRaftEntry*)taosLRUCacheValue(pCache, h);
code = 0;
pSyncNode->pLogStore->cacheHit++;
sNTrace(pSyncNode, "hit cache index:%" PRId64 ", bytes:%u, %p", nextIndex, pEntry->bytes, pEntry);
} else {
pSyncNode->pLogStore->cacheMiss++;
sNTrace(pSyncNode, "miss cache index:%" PRId64, nextIndex);
code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, nextIndex, &pEntry);
}
if (code == 0) {
ASSERT(pEntry != NULL);
code = syncBuildAppendEntries(&rpcMsg, (int32_t)(pEntry->bytes), pSyncNode->vgId);
ASSERT(code == 0);
pMsg = rpcMsg.pCont;
memcpy(pMsg->data, pEntry, pEntry->bytes);
} else {
if (terrno == TSDB_CODE_WAL_LOG_NOT_EXIST) {
// no entry in log
code = syncBuildAppendEntries(&rpcMsg, 0, pSyncNode->vgId);
ASSERT(code == 0);
pMsg = rpcMsg.pCont;
} else {
sNError(pSyncNode, "replicate to dnode:%d error, next-index:%" PRId64, DID(pDestId), nextIndex);
return -1;
}
}
if (h) {
taosLRUCacheRelease(pCache, h, false);
} else {
syncEntryDestroy(pEntry);
}
// prepare msg
ASSERT(pMsg != NULL);
pMsg->srcId = pSyncNode->myRaftId;
pMsg->destId = *pDestId;
pMsg->term = pSyncNode->raftStore.currentTerm;
pMsg->prevLogIndex = preLogIndex;
pMsg->prevLogTerm = preLogTerm;
pMsg->commitIndex = pSyncNode->commitIndex;
pMsg->privateTerm = 0;
// pMsg->privateTerm = syncIndexMgrGetTerm(pSyncNode->pNextIndex, pDestId);
// send msg
syncNodeMaybeSendAppendEntries(pSyncNode, pDestId, &rpcMsg);
return 0;
}
int32_t syncNodeReplicate(SSyncNode* pNode) {
SSyncLogBuffer* pBuf = pNode->pLogBuf;
taosThreadMutexLock(&pBuf->mutex);
......@@ -156,25 +70,6 @@ int32_t syncNodeReplicateWithoutLock(SSyncNode* pNode) {
return 0;
}
int32_t syncNodeReplicateOld(SSyncNode* pSyncNode) {
if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) {
return -1;
}
sNTrace(pSyncNode, "do replicate");
int32_t ret = 0;
for (int i = 0; i < pSyncNode->peersNum; ++i) {
SRaftId* pDestId = &(pSyncNode->peersId[i]);
ret = syncNodeReplicateOne(pSyncNode, pDestId, true);
if (ret != 0) {
sError("vgId:%d, do append entries error for dnode:%d", pSyncNode->vgId, DID(pDestId));
}
}
return 0;
}
int32_t syncNodeSendAppendEntries(SSyncNode* pSyncNode, const SRaftId* destRaftId, SRpcMsg* pRpcMsg) {
SyncAppendEntries* pMsg = pRpcMsg->pCont;
pMsg->destId = *destRaftId;
......@@ -182,39 +77,6 @@ int32_t syncNodeSendAppendEntries(SSyncNode* pSyncNode, const SRaftId* destRaftI
return 0;
}
int32_t syncNodeSendAppendEntriesOld(SSyncNode* pSyncNode, const SRaftId* destRaftId, SRpcMsg* pRpcMsg) {
int32_t ret = 0;
SyncAppendEntries* pMsg = pRpcMsg->pCont;
if (pMsg == NULL) {
sError("vgId:%d, sync-append-entries msg is NULL", pSyncNode->vgId);
return 0;
}
SPeerState* pState = syncNodeGetPeerState(pSyncNode, destRaftId);
if (pState == NULL) {
sError("vgId:%d, replica maybe dropped", pSyncNode->vgId);
return 0;
}
// save index, otherwise pMsg will be free by rpc
SyncIndex saveLastSendIndex = pState->lastSendIndex;
bool update = false;
if (pMsg->dataLen > 0) {
saveLastSendIndex = pMsg->prevLogIndex + 1;
update = true;
}
syncLogSendAppendEntries(pSyncNode, pMsg, "");
syncNodeSendMsgById(destRaftId, pSyncNode, pRpcMsg);
if (update) {
pState->lastSendIndex = saveLastSendIndex;
pState->lastSendTime = taosGetTimestampMs();
}
return ret;
}
int32_t syncNodeMaybeSendAppendEntries(SSyncNode* pSyncNode, const SRaftId* destRaftId, SRpcMsg* pRpcMsg) {
int32_t ret = 0;
SyncAppendEntries* pMsg = pRpcMsg->pCont;
......
......@@ -325,6 +325,35 @@ bool walLogEntriesComplete(const SWal* pWal) {
return complete;
}
int walTrimIdxFile(SWal* pWal, int32_t fileIdx) {
SWalFileInfo* pFileInfo = taosArrayGet(pWal->fileInfoSet, fileIdx);
ASSERT(pFileInfo != NULL);
char fnameStr[WAL_FILE_LEN];
walBuildIdxName(pWal, pFileInfo->firstVer, fnameStr);
int64_t fileSize = 0;
taosStatFile(fnameStr, &fileSize, NULL);
int64_t records = TMAX(0, pFileInfo->lastVer - pFileInfo->firstVer + 1);
int64_t lastEndOffset = records * sizeof(SWalIdxEntry);
if (fileSize <= lastEndOffset) {
return 0;
}
TdFilePtr pFile = taosOpenFile(fnameStr, TD_FILE_READ | TD_FILE_WRITE);
if (pFile == NULL) {
terrno = TAOS_SYSTEM_ERROR(errno);
return -1;
}
wInfo("vgId:%d, trim idx file. file: %s, size: %" PRId64 ", offset: %" PRId64, pWal->cfg.vgId, fnameStr, fileSize,
lastEndOffset);
taosFtruncateFile(pFile, lastEndOffset);
taosCloseFile(&pFile);
return 0;
}
int walCheckAndRepairMeta(SWal* pWal) {
// load log files, get first/snapshot/last version info
const char* logPattern = "^[0-9]+.log$";
......@@ -402,6 +431,8 @@ int walCheckAndRepairMeta(SWal* pWal) {
}
updateMeta = true;
(void)walTrimIdxFile(pWal, fileIdx);
int64_t lastVer = walScanLogGetLastVer(pWal, fileIdx);
if (lastVer < 0) {
if (terrno != TSDB_CODE_WAL_LOG_NOT_EXIST) {
......@@ -567,6 +598,7 @@ int walCheckAndRepairIdxFile(SWal* pWal, int32_t fileIdx) {
goto _err;
}
int64_t count = 0;
while (idxEntry.ver < pFileInfo->lastVer) {
ASSERT(idxEntry.ver == ckHead.head.version);
......@@ -578,11 +610,11 @@ int walCheckAndRepairIdxFile(SWal* pWal, int32_t fileIdx) {
idxEntry.offset, fLogNameStr);
goto _err;
}
wWarn("vgId:%d, wal idx append new entry %" PRId64 " %" PRId64, pWal->cfg.vgId, idxEntry.ver, idxEntry.offset);
if (taosWriteFile(pIdxFile, &idxEntry, sizeof(SWalIdxEntry)) < 0) {
wError("vgId:%d, failed to append file since %s. file:%s", pWal->cfg.vgId, terrstr(), fnameStr);
goto _err;
}
count++;
}
if (taosFsyncFile(pIdxFile) < 0) {
......@@ -590,6 +622,11 @@ int walCheckAndRepairIdxFile(SWal* pWal, int32_t fileIdx) {
goto _err;
}
if (count > 0) {
wInfo("vgId:%d, rebuilt %" PRId64 " wal idx entries until lastVer: %" PRId64, pWal->cfg.vgId, count,
pFileInfo->lastVer);
}
(void)taosCloseFile(&pLogFile);
(void)taosCloseFile(&pIdxFile);
return 0;
......
......@@ -77,6 +77,31 @@ void walUnrefVer(SWalRef *pRef) {
}
#endif
SWalRef *walRefFirstVer(SWal *pWal, SWalRef *pRef) {
if (pRef == NULL) {
pRef = walOpenRef(pWal);
if (pRef == NULL) {
return NULL;
}
}
taosThreadMutexLock(&pWal->mutex);
int64_t ver = walGetFirstVer(pWal);
wDebug("vgId:%d, wal ref version %" PRId64 " for first", pWal->cfg.vgId, ver);
pRef->refVer = ver;
// bsearch in fileSet
SWalFileInfo tmpInfo;
tmpInfo.firstVer = ver;
SWalFileInfo *pRet = taosArraySearch(pWal->fileInfoSet, &tmpInfo, compareWalFileInfo, TD_LE);
ASSERT(pRet != NULL);
pRef->refFile = pRet->firstVer;
taosThreadMutexUnlock(&pWal->mutex);
return pRef;
}
SWalRef *walRefCommittedVer(SWal *pWal) {
SWalRef *pRef = walOpenRef(pWal);
if (pRef == NULL) {
......@@ -86,6 +111,8 @@ SWalRef *walRefCommittedVer(SWal *pWal) {
int64_t ver = walGetCommittedVer(pWal);
wDebug("vgId:%d, wal ref version %" PRId64 " for committed", pWal->cfg.vgId, ver);
pRef->refVer = ver;
// bsearch in fileSet
SWalFileInfo tmpInfo;
......
......@@ -32,7 +32,18 @@ void swapStr(char* j, char* J, int width) {
}
#endif
int qsortHelper(const void* p1, const void* p2, const void* param) {
__compar_fn_t comparFn = param;
return comparFn(p1, p2);
}
// todo refactor: 1) move away; 2) use merge sort instead; 3) qsort is not a stable sort actually.
void taosSort(void* arr, int64_t sz, int64_t width, __compar_fn_t compar) {
qsort(arr, sz, width, compar);
void taosSort(void* base, int64_t sz, int64_t width, __compar_fn_t compar) {
#ifdef _ALPINE
void* param = compar;
taosqsort(base, width, sz, param, qsortHelper);
#else
qsort(base, sz, width, compar);
#endif
}
......@@ -33,6 +33,11 @@
#include <time.h>
//#define TM_YEAR_BASE 1970 //origin
#define TM_YEAR_BASE 1900 // slguan
// This magic number is the number of 100 nanosecond intervals since January 1, 1601 (UTC)
// until 00:00:00 January 1, 1970
static const uint64_t TIMEEPOCH = ((uint64_t)116444736000000000ULL);
/*
* We do not implement alternate representations. However, we always
* check whether a given modifier is allowed for a certain conversion.
......@@ -341,15 +346,17 @@ char *taosStrpTime(const char *buf, const char *fmt, struct tm *tm) {
int32_t taosGetTimeOfDay(struct timeval *tv) {
#ifdef WINDOWS
time_t t;
t = taosGetTimestampSec();
SYSTEMTIME st;
GetLocalTime(&st);
LARGE_INTEGER t;
FILETIME f;
tv->tv_sec = (long)t;
tv->tv_usec = st.wMilliseconds * 1000;
GetSystemTimeAsFileTime(&f);
t.QuadPart = f.dwHighDateTime;
t.QuadPart <<= 32;
t.QuadPart |= f.dwLowDateTime;
return 0;
t.QuadPart -= TIMEEPOCH;
tv->tv_sec = t.QuadPart / 10000000;
tv->tv_usec = (t.QuadPart % 10000000) / 10;
#else
return gettimeofday(tv, NULL);
#endif
......@@ -550,37 +557,13 @@ int32_t taosClockGetTime(int clock_id, struct timespec *pTS) {
#ifdef WINDOWS
LARGE_INTEGER t;
FILETIME f;
static FILETIME ff;
static SYSTEMTIME ss;
static LARGE_INTEGER offset;
static int8_t offsetInit = 0;
static volatile bool offsetInitFinished = false;
int8_t old = atomic_val_compare_exchange_8(&offsetInit, 0, 1);
if (0 == old) {
ss.wYear = 1970;
ss.wMonth = 1;
ss.wDay = 1;
ss.wHour = 0;
ss.wMinute = 0;
ss.wSecond = 0;
ss.wMilliseconds = 0;
SystemTimeToFileTime(&ss, &ff);
offset.QuadPart = ff.dwHighDateTime;
offset.QuadPart <<= 32;
offset.QuadPart |= ff.dwLowDateTime;
offsetInitFinished = true;
} else {
while (!offsetInitFinished)
; // Ensure initialization is completed.
}
GetSystemTimeAsFileTime(&f);
t.QuadPart = f.dwHighDateTime;
t.QuadPart <<= 32;
t.QuadPart |= f.dwLowDateTime;
t.QuadPart -= offset.QuadPart;
t.QuadPart -= TIMEEPOCH;
pTS->tv_sec = t.QuadPart / 10000000;
pTS->tv_nsec = (t.QuadPart % 10000000) * 100;
return (0);
......
......@@ -41,12 +41,6 @@ static void median(void *src, int64_t size, int64_t s, int64_t e, const void *pa
ASSERT(comparFn(elePtrAt(src, size, mid), elePtrAt(src, size, s), param) <= 0 &&
comparFn(elePtrAt(src, size, s), elePtrAt(src, size, e), param) <= 0);
#ifdef _DEBUG_VIEW
// tTagsPrints(src[s], pOrderDesc->pColumnModel, &pOrderDesc->orderIdx);
// tTagsPrints(src[mid], pOrderDesc->pColumnModel, &pOrderDesc->orderIdx);
// tTagsPrints(src[e], pOrderDesc->pColumnModel, &pOrderDesc->orderIdx);
#endif
}
static void tInsertSort(void *src, int64_t size, int32_t s, int32_t e, const void *param, __ext_compar_fn_t comparFn,
......@@ -278,14 +272,4 @@ void taosheapsort(void *base, int32_t size, int32_t len, const void *parcompar,
}
taosMemoryFree(buf);
/*
char *buf = taosMemoryCalloc(1, size);
for (i = len - 1; i > 0; i--) {
doswap(elePtrAt(base, size, 0), elePtrAt(base, size, i));
taosheapadjust(base, size, 0, i - 1, parcompar, compar, parswap, swap, maxroot);
}
taosMemoryFreeClear(buf);
*/
}
......@@ -23,7 +23,7 @@ python3 bind_param_example.py
# 4
taos -s "drop database power"
python3 multi_bind_example.py
python3 multi_bind_example.py
# 5
python3 query_example.py
......@@ -44,4 +44,43 @@ taos -s "drop database test"
python3 json_protocol_example.py
# 10
# python3 subscribe_demo.py
pip install SQLAlchemy
pip install pandas
taosBenchmark -y -d power -t 10 -n 10
python3 conn_native_pandas.py
python3 conn_rest_pandas.py
taos -s "drop database if exists power"
# 11
taos -s "create database if not exists test"
python3 connect_native_reference.py
# 12
python3 connect_rest_examples.py
# 13
python3 handle_exception.py
# 14
taosBenchmark -y -d power -t 2 -n 10
python3 rest_client_example.py
taos -s "drop database if exists power"
# 15
python3 result_set_examples.py
# 16
python3 tmq_example.py
# 17
python3 sql_writer.py
# 18
python3 mockdatasource.py
# 19
python3 fast_write_example.py
# 20
pip3 install kafka-python
python3 kafka_example.py
......@@ -55,7 +55,7 @@ fi
date
docker run \
-v $REP_MOUNT_PARAM \
--rm --ulimit core=-1 taos_test:v1.0 sh -c "cd $REP_DIR;rm -rf debug;mkdir -p debug;cd debug;cmake .. -DBUILD_HTTP=false -DBUILD_TOOLS=true -DBUILD_TEST=true -DWEBSOCKET=true;make -j || exit 1"
--rm --ulimit core=-1 taos_test:v1.0 sh -c "cd $REP_DIR;rm -rf debug;mkdir -p debug;cd debug;cmake .. -DBUILD_HTTP=false -DBUILD_TOOLS=true -DBUILD_TEST=true -DWEBSOCKET=true -DBUILD_TAOSX=true;make -j || exit 1"
if [[ -d ${WORKDIR}/debugNoSan ]] ;then
echo "delete ${WORKDIR}/debugNoSan"
......@@ -70,7 +70,7 @@ mv ${REP_REAL_PATH}/debug ${WORKDIR}/debugNoSan
date
docker run \
-v $REP_MOUNT_PARAM \
--rm --ulimit core=-1 taos_test:v1.0 sh -c "cd $REP_DIR;rm -rf debug;mkdir -p debug;cd debug;cmake .. -DBUILD_HTTP=false -DBUILD_TOOLS=true -DBUILD_TEST=true -DWEBSOCKET=true -DBUILD_SANITIZER=1 -DTOOLS_SANITIZE=true -DTOOLS_BUILD_TYPE=Debug;make -j || exit 1 "
--rm --ulimit core=-1 taos_test:v1.0 sh -c "cd $REP_DIR;rm -rf debug;mkdir -p debug;cd debug;cmake .. -DBUILD_HTTP=false -DBUILD_TOOLS=true -DBUILD_TEST=true -DWEBSOCKET=true -DBUILD_SANITIZER=1 -DTOOLS_SANITIZE=true -DTOOLS_BUILD_TYPE=Debug -DBUILD_TAOSX=true;make -j || exit 1 "
mv ${REP_REAL_PATH}/debug ${WORKDIR}/debugSan
......
......@@ -79,6 +79,7 @@ sql insert into db.ctb6 values(now, 6, "6")
sql insert into db.ctb7 values(now, 7, "7")
sql insert into db.ctb8 values(now, 8, "8")
sql insert into db.ctb9 values(now, 9, "9")
sql flush database db;
print =============== step3: create dnodes
sql create dnode $hostname port 7300
......
......@@ -834,4 +834,57 @@ endi
print ====== test _wstart end
print insert into ts1 values(-1648791211000,1,2,3)
sql create database test7 vgroups 1;
sql use test7;
sql create stable st(ts timestamp, a int, b int , c int) tags(ta int,tb int,tc int);
sql create table ts1 using st tags(1,1,1);
sql create stream streams7 trigger at_once into streamt7 as select _wstart, count(*) from ts1 interval(10s) ;
sql insert into ts1 values(1648791211000,1,2,3);
sql_error insert into ts1 values(-1648791211000,1,2,3);
loop18:
sleep 200
sql select * from streamt7;
$loop_count = $loop_count + 1
if $loop_count == 10 then
return -1
endi
if $rows != 1 then
print =====rows=$rows
goto loop18
endi
if $data01 != 1 then
print =====data01=$data01
goto loop18
endi
sql_error insert into ts1 values(-1648791211001,1,2,3) (1648791211001,1,2,3);
sql select _wstart, count(*) from ts1 interval(10s) ;
print $data00 $data01
print $data10 $data11
loop19:
sleep 200
sql select * from streamt7;
$loop_count = $loop_count + 1
if $loop_count == 10 then
return -1
endi
if $rows != 1 then
print =====rows=$rows
goto loop19
endi
system sh/exec.sh -n dnode1 -s stop -x SIGINT
......@@ -29,69 +29,119 @@ sql insert into t1 values(1648791223001,2,2,3,1.1);
sql insert into t1 values(1648791223002,2,2,3,1.1);
sql insert into t1 values(1648791223003,2,2,3,1.1);
sql insert into t1 values(1648791223001,2,2,3,1.1);
print step 0
$loop_count = 0
loop0:
sleep 300
$loop_count = $loop_count + 1
if $loop_count == 10 then
return -1
endi
sql select * from streamt;
if $rows != 1 then
print ======$rows
return -1
goto loop0
endi
if $data01 != 1 then
print ======$data01
return -1
goto loop0
endi
sql insert into t1 values(1648791233001,2,2,3,1.1);
print step 1
$loop_count = 0
loop1:
sleep 300
$loop_count = $loop_count + 1
if $loop_count == 10 then
return -1
endi
sql select * from streamt;
if $rows != 2 then
print ======$rows
return -1
goto loop1
endi
if $data01 != 1 then
print ======$data01
return -1
goto loop1
endi
if $data11 != 3 then
print ======$data11
return -1
goto loop1
endi
sql insert into t1 values(1648791223004,2,2,3,1.1);
sql insert into t1 values(1648791223004,2,2,3,1.1);
sql insert into t1 values(1648791223005,2,2,3,1.1);
print step 2
$loop_count = 0
loop2:
sleep 300
$loop_count = $loop_count + 1
if $loop_count == 10 then
return -1
endi
sql select * from streamt;
if $rows != 2 then
print ======$rows
return -1
goto loop2
endi
if $data01 != 1 then
print ======$data01
return -1
goto loop2
endi
if $data11 != 5 then
print ======$data11
return -1
goto loop2
endi
sql insert into t1 values(1648791233002,3,2,3,2.1);
sql insert into t1 values(1648791213002,4,2,3,3.1)
sql insert into t1 values(1648791213002,4,2,3,4.1);
print step 3
$loop_count = 0
loop3:
sleep 300
$loop_count = $loop_count + 1
if $loop_count == 10 then
return -1
endi
sql select * from streamt;
if $rows != 2 then
print ======$rows
return -1
goto loop3
endi
if $data01 != 2 then
print ======$data01
return -1
goto loop3
endi
if $data11 != 5 then
print ======$data11
return -1
goto loop3
endi
system sh/exec.sh -n dnode1 -s stop -x SIGINT
\ No newline at end of file
......@@ -206,7 +206,7 @@ class TDTestCase:
paraDict['rowsPerTbl'] = self.rowsPerTbl
consumerId = 1
if self.snapshot == 0:
expectrowcnt = int(paraDict["rowsPerTbl"] * paraDict["ctbNum"] * (2))
expectrowcnt = int(paraDict["rowsPerTbl"] * paraDict["ctbNum"] * (1/2))
elif self.snapshot == 1:
expectrowcnt = int(paraDict["rowsPerTbl"] * paraDict["ctbNum"] * (1))
......
......@@ -213,9 +213,9 @@ class TDTestCase:
paraDict['rowsPerTbl'] = self.rowsPerTbl
consumerId = 1
if self.snapshot == 0:
expectrowcnt = int(paraDict["rowsPerTbl"] * paraDict["ctbNum"] * (2 + 1/2*1/2*2 + 1/2*1/2))
expectrowcnt = int(paraDict["rowsPerTbl"] * paraDict["ctbNum"] * (1/2) * (1/2*3))
elif self.snapshot == 1:
expectrowcnt = int(paraDict["rowsPerTbl"] * paraDict["ctbNum"] * (2 + 1/2*1/2))
expectrowcnt = int(paraDict["rowsPerTbl"] * paraDict["ctbNum"] * (1 + 1/2))
topicList = topicFromStb1
ifcheckdata = 1
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册