diff --git a/cmake/taosadapter_CMakeLists.txt.in b/cmake/taosadapter_CMakeLists.txt.in index 13b247770ea7eef6b64209ca98787ff6d733bf85..d1560574593f613d2f9cf5f486a22d24c32764f9 100644 --- a/cmake/taosadapter_CMakeLists.txt.in +++ b/cmake/taosadapter_CMakeLists.txt.in @@ -2,7 +2,7 @@ # taosadapter ExternalProject_Add(taosadapter GIT_REPOSITORY https://github.com/taosdata/taosadapter.git - GIT_TAG 213f8b3 + GIT_TAG 3e08996 SOURCE_DIR "${TD_SOURCE_DIR}/tools/taosadapter" BINARY_DIR "" #BUILD_IN_SOURCE TRUE diff --git a/cmake/taostools_CMakeLists.txt.in b/cmake/taostools_CMakeLists.txt.in index 13a81f88eab42c64be7ea0cf759da21ddce7a456..926d0c63e7f3b5df80aeac05afba52530e9d77bf 100644 --- a/cmake/taostools_CMakeLists.txt.in +++ b/cmake/taostools_CMakeLists.txt.in @@ -2,7 +2,7 @@ # taos-tools ExternalProject_Add(taos-tools GIT_REPOSITORY https://github.com/taosdata/taos-tools.git - GIT_TAG 0cd564a + GIT_TAG 181bcac SOURCE_DIR "${TD_SOURCE_DIR}/tools/taos-tools" BINARY_DIR "" #BUILD_IN_SOURCE TRUE diff --git a/docs/en/12-taos-sql/02-database.md b/docs/en/12-taos-sql/02-database.md index 476df0a60d593f5215432c08ba4d7560021693e6..059f124ea5b5f380cd72ba55a20b6cf3a80b035e 100644 --- a/docs/en/12-taos-sql/02-database.md +++ b/docs/en/12-taos-sql/02-database.md @@ -58,7 +58,7 @@ database_option: { - WAL_FSYNC_PERIOD: specifies the interval (in milliseconds) at which data is written from the WAL to disk. This parameter takes effect only when the WAL parameter is set to 2. The default value is 3000. Enter a value between 0 and 180000. The value 0 indicates that incoming data is immediately written to disk. - MAXROWS: specifies the maximum number of rows recorded in a block. The default value is 4096. - MINROWS: specifies the minimum number of rows recorded in a block. The default value is 100. -- KEEP: specifies the time for which data is retained. Enter a value between 1 and 365000. The default value is 3650. The value of the KEEP parameter must be greater than or equal to the value of the DURATION parameter. TDengine automatically deletes data that is older than the value of the KEEP parameter. You can use m (minutes), h (hours), and d (days) as the unit, for example KEEP 100h or KEEP 10d. If you do not include a unit, d is used by default. +- KEEP: specifies the time for which data is retained. Enter a value between 1 and 365000. The default value is 3650. The value of the KEEP parameter must be greater than or equal to the value of the DURATION parameter. TDengine automatically deletes data that is older than the value of the KEEP parameter. You can use m (minutes), h (hours), and d (days) as the unit, for example KEEP 100h or KEEP 10d. If you do not include a unit, d is used by default. The Enterprise Edition supports [Tiered Storage](https://docs.tdengine.com/tdinternal/arch/#tiered-storage) function, thus multiple KEEP values (comma separated and up to 3 values supported, and meet keep 0 <= keep 1 <= keep 2, e.g. KEEP 100h,100d,3650d) are supported; the Community Edition does not support Tiered Storage function (although multiple keep values are configured, they do not take effect, only the maximum keep value is used as KEEP). - PAGES: specifies the number of pages in the metadata storage engine cache on each vnode. Enter a value greater than or equal to 64. The default value is 256. The space occupied by metadata storage on each vnode is equal to the product of the values of the PAGESIZE and PAGES parameters. The space occupied by default is 1 MB. - PAGESIZE: specifies the size (in KB) of each page in the metadata storage engine cache on each vnode. The default value is 4. Enter a value between 1 and 16384. - PRECISION: specifies the precision at which a database records timestamps. Enter ms for milliseconds, us for microseconds, or ns for nanoseconds. The default value is ms. diff --git a/docs/en/12-taos-sql/24-show.md b/docs/en/12-taos-sql/24-show.md index 2db3e7cb31463e20f024f48e62d06422519ba0e7..f70d86570e5fe9cf4f9eb6e58dd1908c62adcc89 100644 --- a/docs/en/12-taos-sql/24-show.md +++ b/docs/en/12-taos-sql/24-show.md @@ -363,7 +363,7 @@ Shows information about all vgroups in the system or about the vgroups for a spe ## SHOW VNODES ```sql -SHOW VNODES [dnode_name]; +SHOW VNODES {dnode_id | dnode_endpoint}; ``` Shows information about all vnodes in the system or about the vnodes for a specified dnode. diff --git a/docs/en/14-reference/12-config/index.md b/docs/en/14-reference/12-config/index.md index a8f4978abfbdc3fbdf98714ac1ad41d8245ee94e..9e56a0b0bff931c3b10103c5d63f9134baf280a1 100644 --- a/docs/en/14-reference/12-config/index.md +++ b/docs/en/14-reference/12-config/index.md @@ -323,6 +323,7 @@ The charset that takes effect is UTF-8. | Applicable | Server Only | | Meaning | All data files are stored in this directory | | Default Value | /var/lib/taos | +| Note | The [Tiered Storage](https://docs.tdengine.com/tdinternal/arch/#tiered-storage) function needs to be used in conjunction with the [KEEP](https://docs.tdengine.com/taos-sql/database/#parameters) parameter | ### tempDir diff --git a/docs/examples/go/go.sum b/docs/examples/go/go.sum new file mode 100644 index 0000000000000000000000000000000000000000..13e13adaa189053696320a6eb9740daa319a98b7 --- /dev/null +++ b/docs/examples/go/go.sum @@ -0,0 +1,15 @@ +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/gorilla/websocket v1.5.0/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/taosdata/driver-go/v3 v3.1.0/go.mod h1:H2vo/At+rOPY1aMzUV9P49SVX7NlXb3LAbKw+MCLrmU= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/docs/examples/python/conn_native_pandas.py b/docs/examples/python/conn_native_pandas.py index 56942ef57085766cd128b03cabb7a357587eab16..f3bab15efbe6669a88828fb194682dbfedb382df 100644 --- a/docs/examples/python/conn_native_pandas.py +++ b/docs/examples/python/conn_native_pandas.py @@ -1,8 +1,11 @@ import pandas -from sqlalchemy import create_engine +from sqlalchemy import create_engine, text engine = create_engine("taos://root:taosdata@localhost:6030/power") -df = pandas.read_sql("SELECT * FROM meters", engine) +conn = engine.connect() +df = pandas.read_sql(text("SELECT * FROM power.meters"), conn) +conn.close() + # print index print(df.index) diff --git a/docs/examples/python/conn_rest_pandas.py b/docs/examples/python/conn_rest_pandas.py index 0164080cd5a05e72dce40b1d111ea423623ff9b2..1b207d6ff10a353f3473116ce807cc8daf362ca7 100644 --- a/docs/examples/python/conn_rest_pandas.py +++ b/docs/examples/python/conn_rest_pandas.py @@ -1,8 +1,10 @@ import pandas -from sqlalchemy import create_engine +from sqlalchemy import create_engine, text engine = create_engine("taosrest://root:taosdata@localhost:6041") -df: pandas.DataFrame = pandas.read_sql("SELECT * FROM power.meters", engine) +conn = engine.connect() +df: pandas.DataFrame = pandas.read_sql(text("SELECT * FROM power.meters"), conn) +conn.close() # print index print(df.index) diff --git a/docs/examples/python/connect_rest_examples.py b/docs/examples/python/connect_rest_examples.py index dba00b5a8279a3cbb3cab0a2d8b26bb312364479..0f8625ae5387a275f7b84948ad80191b8e443862 100644 --- a/docs/examples/python/connect_rest_examples.py +++ b/docs/examples/python/connect_rest_examples.py @@ -1,18 +1,19 @@ # ANCHOR: connect from taosrest import connect, TaosRestConnection, TaosRestCursor -conn: TaosRestConnection = connect(url="http://localhost:6041", - user="root", - password="taosdata", - timeout=30) +conn = connect(url="http://localhost:6041", + user="root", + password="taosdata", + timeout=30) # ANCHOR_END: connect # ANCHOR: basic # create STable -cursor: TaosRestCursor = conn.cursor() +cursor = conn.cursor() cursor.execute("DROP DATABASE IF EXISTS power") cursor.execute("CREATE DATABASE power") -cursor.execute("CREATE STABLE power.meters (ts TIMESTAMP, current FLOAT, voltage INT, phase FLOAT) TAGS (location BINARY(64), groupId INT)") +cursor.execute( + "CREATE STABLE power.meters (ts TIMESTAMP, current FLOAT, voltage INT, phase FLOAT) TAGS (location BINARY(64), groupId INT)") # insert data cursor.execute("""INSERT INTO power.d1001 USING power.meters TAGS('California.SanFrancisco', 2) VALUES ('2018-10-03 14:38:05.000', 10.30000, 219, 0.31000) ('2018-10-03 14:38:15.000', 12.60000, 218, 0.33000) ('2018-10-03 14:38:16.800', 12.30000, 221, 0.31000) @@ -28,7 +29,7 @@ print("queried row count:", cursor.rowcount) # get column names from cursor column_names = [meta[0] for meta in cursor.description] # get rows -data: list[tuple] = cursor.fetchall() +data = cursor.fetchall() print(column_names) for row in data: print(row) diff --git a/docs/examples/python/connection_usage_native_reference.py b/docs/examples/python/connection_usage_native_reference.py index a7179b4cf859eb440b535a797eeb8e2be1e33589..8b754ec7226e8fd25dbdeb27b28faebdcf612049 100644 --- a/docs/examples/python/connection_usage_native_reference.py +++ b/docs/examples/python/connection_usage_native_reference.py @@ -8,7 +8,7 @@ conn.execute("CREATE DATABASE test") # change database. same as execute "USE db" conn.select_db("test") conn.execute("CREATE STABLE weather(ts TIMESTAMP, temperature FLOAT) TAGS (location INT)") -affected_row: int = conn.execute("INSERT INTO t1 USING weather TAGS(1) VALUES (now, 23.5) (now+1m, 23.5) (now+2m, 24.4)") +affected_row = conn.execute("INSERT INTO t1 USING weather TAGS(1) VALUES (now, 23.5) (now+1m, 23.5) (now+2m, 24.4)") print("affected_row", affected_row) # output: # affected_row 3 @@ -16,10 +16,10 @@ print("affected_row", affected_row) # ANCHOR: query # Execute a sql and get its result set. It's useful for SELECT statement -result: taos.TaosResult = conn.query("SELECT * from weather") +result = conn.query("SELECT * from weather") # Get fields from result -fields: taos.field.TaosFields = result.fields +fields = result.fields for field in fields: print(field) # {name: ts, type: 9, bytes: 8} diff --git a/docs/examples/python/fast_write_example.py b/docs/examples/python/fast_write_example.py index c9d606388fdecd85f1468f24cc497ecc5941f035..626e3310b120b9415952614b4b110ed29f787582 100644 --- a/docs/examples/python/fast_write_example.py +++ b/docs/examples/python/fast_write_example.py @@ -1,15 +1,14 @@ # install dependencies: # recommend python >= 3.8 -# pip3 install faster-fifo # import logging import math +import multiprocessing import sys import time import os -from multiprocessing import Process -from faster_fifo import Queue +from multiprocessing import Process, Queue from mockdatasource import MockDataSource from queue import Empty from typing import List @@ -22,8 +21,7 @@ TABLE_COUNT = 1000 QUEUE_SIZE = 1000000 MAX_BATCH_SIZE = 3000 -read_processes = [] -write_processes = [] +_DONE_MESSAGE = '__DONE__' def get_connection(): @@ -44,41 +42,64 @@ def get_connection(): # ANCHOR: read -def run_read_task(task_id: int, task_queues: List[Queue]): +def run_read_task(task_id: int, task_queues: List[Queue], infinity): table_count_per_task = TABLE_COUNT // READ_TASK_COUNT - data_source = MockDataSource(f"tb{task_id}", table_count_per_task) + data_source = MockDataSource(f"tb{task_id}", table_count_per_task, infinity) try: for batch in data_source: + if isinstance(batch, tuple): + batch = [batch] for table_id, rows in batch: # hash data to different queue i = table_id % len(task_queues) # block putting forever when the queue is full - task_queues[i].put_many(rows, block=True, timeout=-1) + for row in rows: + task_queues[i].put(row) + if not infinity: + for queue in task_queues: + queue.put(_DONE_MESSAGE) except KeyboardInterrupt: pass + finally: + logging.info('read task over') # ANCHOR_END: read + # ANCHOR: write -def run_write_task(task_id: int, queue: Queue): +def run_write_task(task_id: int, queue: Queue, done_queue: Queue): from sql_writer import SQLWriter log = logging.getLogger(f"WriteTask-{task_id}") writer = SQLWriter(get_connection) lines = None try: while True: - try: - # get as many as possible - lines = queue.get_many(block=False, max_messages_to_get=MAX_BATCH_SIZE) + over = False + lines = [] + for _ in range(MAX_BATCH_SIZE): + try: + line = queue.get_nowait() + if line == _DONE_MESSAGE: + over = True + break + if line: + lines.append(line) + except Empty: + time.sleep(0.1) + if len(lines) > 0: writer.process_lines(lines) - except Empty: - time.sleep(0.01) + if over: + done_queue.put(_DONE_MESSAGE) + break except KeyboardInterrupt: pass except BaseException as e: log.debug(f"lines={lines}") raise e + finally: + writer.close() + log.debug('write task over') # ANCHOR_END: write @@ -103,47 +124,64 @@ def set_global_config(): # ANCHOR: monitor -def run_monitor_process(): +def run_monitor_process(done_queue: Queue): log = logging.getLogger("DataBaseMonitor") - conn = get_connection() - conn.execute("DROP DATABASE IF EXISTS test") - conn.execute("CREATE DATABASE test") - conn.execute("CREATE STABLE test.meters (ts TIMESTAMP, current FLOAT, voltage INT, phase FLOAT) " - "TAGS (location BINARY(64), groupId INT)") + conn = None + try: + conn = get_connection() - def get_count(): - res = conn.query("SELECT count(*) FROM test.meters") - rows = res.fetch_all() - return rows[0][0] if rows else 0 + def get_count(): + res = conn.query("SELECT count(*) FROM test.meters") + rows = res.fetch_all() + return rows[0][0] if rows else 0 - last_count = 0 - while True: - time.sleep(10) - count = get_count() - log.info(f"count={count} speed={(count - last_count) / 10}") - last_count = count + last_count = 0 + while True: + try: + done = done_queue.get_nowait() + if done == _DONE_MESSAGE: + break + except Empty: + pass + time.sleep(10) + count = get_count() + log.info(f"count={count} speed={(count - last_count) / 10}") + last_count = count + finally: + conn.close() # ANCHOR_END: monitor # ANCHOR: main -def main(): +def main(infinity): set_global_config() logging.info(f"READ_TASK_COUNT={READ_TASK_COUNT}, WRITE_TASK_COUNT={WRITE_TASK_COUNT}, " f"TABLE_COUNT={TABLE_COUNT}, QUEUE_SIZE={QUEUE_SIZE}, MAX_BATCH_SIZE={MAX_BATCH_SIZE}") - monitor_process = Process(target=run_monitor_process) + conn = get_connection() + conn.execute("DROP DATABASE IF EXISTS test") + conn.execute("CREATE DATABASE IF NOT EXISTS test") + conn.execute("CREATE STABLE IF NOT EXISTS test.meters (ts TIMESTAMP, current FLOAT, voltage INT, phase FLOAT) " + "TAGS (location BINARY(64), groupId INT)") + conn.close() + + done_queue = Queue() + monitor_process = Process(target=run_monitor_process, args=(done_queue,)) monitor_process.start() - time.sleep(3) # waiting for database ready. + logging.debug(f"monitor task started with pid {monitor_process.pid}") task_queues: List[Queue] = [] + write_processes = [] + read_processes = [] + # create task queues for i in range(WRITE_TASK_COUNT): - queue = Queue(max_size_bytes=QUEUE_SIZE) + queue = Queue() task_queues.append(queue) # create write processes for i in range(WRITE_TASK_COUNT): - p = Process(target=run_write_task, args=(i, task_queues[i])) + p = Process(target=run_write_task, args=(i, task_queues[i], done_queue)) p.start() logging.debug(f"WriteTask-{i} started with pid {p.pid}") write_processes.append(p) @@ -151,13 +189,19 @@ def main(): # create read processes for i in range(READ_TASK_COUNT): queues = assign_queues(i, task_queues) - p = Process(target=run_read_task, args=(i, queues)) + p = Process(target=run_read_task, args=(i, queues, infinity)) p.start() logging.debug(f"ReadTask-{i} started with pid {p.pid}") read_processes.append(p) try: monitor_process.join() + for p in read_processes: + p.join() + for p in write_processes: + p.join() + time.sleep(1) + return except KeyboardInterrupt: monitor_process.terminate() [p.terminate() for p in read_processes] @@ -176,5 +220,6 @@ def assign_queues(read_task_id, task_queues): if __name__ == '__main__': - main() + multiprocessing.set_start_method('spawn') + main(False) # ANCHOR_END: main diff --git a/docs/examples/python/kafka_example.py b/docs/examples/python/kafka_example.py index 735059eec0f3dcf5094810916e66a39db5682560..43f9183f7e25b680827aef836363ef5f0549468b 100644 --- a/docs/examples/python/kafka_example.py +++ b/docs/examples/python/kafka_example.py @@ -26,7 +26,8 @@ class Consumer(object): 'bath_consume': True, 'batch_size': 1000, 'async_model': True, - 'workers': 10 + 'workers': 10, + 'testing': False } LOCATIONS = ['California.SanFrancisco', 'California.LosAngles', 'California.SanDiego', 'California.SanJose', @@ -46,11 +47,12 @@ class Consumer(object): def __init__(self, **configs): self.config: dict = self.DEFAULT_CONFIGS self.config.update(configs) - self.consumer = KafkaConsumer( - self.config.get('kafka_topic'), # topic - bootstrap_servers=self.config.get('kafka_brokers'), - group_id=self.config.get('kafka_group_id'), - ) + if not self.config.get('testing'): + self.consumer = KafkaConsumer( + self.config.get('kafka_topic'), # topic + bootstrap_servers=self.config.get('kafka_brokers'), + group_id=self.config.get('kafka_group_id'), + ) self.taos = taos.connect( host=self.config.get('taos_host'), user=self.config.get('taos_user'), @@ -60,7 +62,7 @@ class Consumer(object): ) if self.config.get('async_model'): self.pool = ThreadPoolExecutor(max_workers=self.config.get('workers')) - self.tasks: list[Future] = [] + self.tasks = [] # tags and table mapping # key: {location}_{groupId} value: self.tag_table_mapping = {} i = 0 @@ -115,14 +117,14 @@ class Consumer(object): if self.taos is not None: self.taos.close() - def _run(self, f: Callable[[ConsumerRecord], bool]): + def _run(self, f): for message in self.consumer: if self.config.get('async_model'): self.pool.submit(f(message)) else: f(message) - def _run_batch(self, f: Callable[[list[list[ConsumerRecord]]], None]): + def _run_batch(self, f): while True: messages = self.consumer.poll(timeout_ms=500, max_records=self.config.get('batch_size')) if messages: @@ -140,7 +142,7 @@ class Consumer(object): logging.info('## insert sql %s', sql) return self.taos.execute(sql=sql) == 1 - def _to_taos_batch(self, messages: list[list[ConsumerRecord]]): + def _to_taos_batch(self, messages): sql = self._build_sql_batch(messages=messages) if len(sql) == 0: # decode error, skip return @@ -162,7 +164,7 @@ class Consumer(object): table_name = self._get_table_name(location=location, group_id=group_id) return self.INSERT_PART_SQL.format(table_name, ts, current, voltage, phase) - def _build_sql_batch(self, messages: list[list[ConsumerRecord]]) -> str: + def _build_sql_batch(self, messages) -> str: sql_list = [] for partition_messages in messages: for message in partition_messages: @@ -186,7 +188,54 @@ def _get_location_and_group(key: str) -> (str, int): return fields[0], fields[1] +def test_to_taos(consumer: Consumer): + msg = { + 'location': 'California.SanFrancisco', + 'groupId': 1, + 'ts': '2022-12-06 15:13:38.643', + 'current': 3.41, + 'voltage': 105, + 'phase': 0.02027, + } + record = ConsumerRecord(checksum=None, headers=None, offset=1, key=None, value=json.dumps(msg), partition=1, + topic='test', serialized_key_size=None, serialized_header_size=None, + serialized_value_size=None, timestamp=time.time(), timestamp_type=None) + assert consumer._to_taos(message=record) + + +def test_to_taos_batch(consumer: Consumer): + records = [ + [ + ConsumerRecord(checksum=None, headers=None, offset=1, key=None, + value=json.dumps({'location': 'California.SanFrancisco', + 'groupId': 1, + 'ts': '2022-12-06 15:13:38.643', + 'current': 3.41, + 'voltage': 105, + 'phase': 0.02027, }), + partition=1, topic='test', serialized_key_size=None, serialized_header_size=None, + serialized_value_size=None, timestamp=time.time(), timestamp_type=None), + ConsumerRecord(checksum=None, headers=None, offset=1, key=None, + value=json.dumps({'location': 'California.LosAngles', + 'groupId': 2, + 'ts': '2022-12-06 15:13:39.643', + 'current': 3.41, + 'voltage': 102, + 'phase': 0.02027, }), + partition=1, topic='test', serialized_key_size=None, serialized_header_size=None, + serialized_value_size=None, timestamp=time.time(), timestamp_type=None), + ] + ] + + consumer._to_taos_batch(messages=records) + + if __name__ == '__main__': - consumer = Consumer(async_model=True) + consumer = Consumer(async_model=True, testing=True) + # init env consumer.init_env() - consumer.consume() \ No newline at end of file + # consumer.consume() + # test build sql + # test build sql batch + test_to_taos(consumer) + test_to_taos_batch(consumer) diff --git a/docs/examples/python/mockdatasource.py b/docs/examples/python/mockdatasource.py index 1c516a800e007934f8e6815f82024a53fea70073..9c702936ea6f1bdff3f604d376fd1925b4dc118e 100644 --- a/docs/examples/python/mockdatasource.py +++ b/docs/examples/python/mockdatasource.py @@ -10,13 +10,14 @@ class MockDataSource: "9.4,118,0.141,California.SanFrancisco,4" ] - def __init__(self, tb_name_prefix, table_count): + def __init__(self, tb_name_prefix, table_count, infinity=True): self.table_name_prefix = tb_name_prefix + "_" self.table_count = table_count self.max_rows = 10000000 self.current_ts = round(time.time() * 1000) - self.max_rows * 100 # [(tableId, tableName, values),] self.data = self._init_data() + self.infinity = infinity def _init_data(self): lines = self.samples * (self.table_count // 5 + 1) @@ -28,14 +29,19 @@ class MockDataSource: def __iter__(self): self.row = 0 - return self + if not self.infinity: + return iter(self._iter_data()) + else: + return self def __next__(self): """ next 1000 rows for each table. return: {tableId:[row,...]} """ - # generate 1000 timestamps + return self._iter_data() + + def _iter_data(self): ts = [] for _ in range(1000): self.current_ts += 100 @@ -47,3 +53,9 @@ class MockDataSource: rows = [table_name + ',' + t + ',' + values for t in ts] result.append((table_id, rows)) return result + + +if __name__ == '__main__': + datasource = MockDataSource('t', 10, False) + for data in datasource: + print(data) diff --git a/docs/examples/python/sql_writer.py b/docs/examples/python/sql_writer.py index 758167376b009f21afc701be7d89c1bfbabdeb9f..3456981a7b9a174e38f8795ff7251ab3c675174b 100644 --- a/docs/examples/python/sql_writer.py +++ b/docs/examples/python/sql_writer.py @@ -10,6 +10,7 @@ class SQLWriter: self._tb_tags = {} self._conn = get_connection_func() self._max_sql_length = self.get_max_sql_length() + self._conn.execute("create database if not exists test") self._conn.execute("USE test") def get_max_sql_length(self): @@ -20,7 +21,7 @@ class SQLWriter: return int(r[1]) return 1024 * 1024 - def process_lines(self, lines: str): + def process_lines(self, lines: [str]): """ :param lines: [[tbName,ts,current,voltage,phase,location,groupId]] """ @@ -60,6 +61,7 @@ class SQLWriter: buf.append(q) sql_len += len(q) sql += " ".join(buf) + self.create_tables() self.execute_sql(sql) self._tb_values.clear() @@ -88,3 +90,22 @@ class SQLWriter: except BaseException as e: self.log.error("Execute SQL: %s", sql) raise e + + def close(self): + if self._conn: + self._conn.close() + + +if __name__ == '__main__': + def get_connection_func(): + conn = taos.connect() + return conn + + + writer = SQLWriter(get_connection_func=get_connection_func) + writer.execute_sql( + "create stable if not exists meters (ts timestamp, current float, voltage int, phase float) " + "tags (location binary(64), groupId int)") + writer.execute_sql( + "INSERT INTO d21001 USING meters TAGS ('California.SanFrancisco', 2) " + "VALUES ('2021-07-13 14:06:32.272', 10.2, 219, 0.32)") diff --git a/docs/examples/python/tmq_example.py b/docs/examples/python/tmq_example.py index fafa81e8b552b32a47c9e96833ec6d5959dc538a..6f7fb87c89ce4cb96793d09a837f60ad54ae69bc 100644 --- a/docs/examples/python/tmq_example.py +++ b/docs/examples/python/tmq_example.py @@ -19,8 +19,14 @@ def init_tmq_env(db, topic): conn.execute("insert into tb3 values (now, 3, 3.0, 'tmq test')") +def cleanup(db, topic): + conn = taos.connect() + conn.execute("drop topic if exists {}".format(topic)) + conn.execute("drop database if exists {}".format(db)) + + if __name__ == '__main__': - init_tmq_env("tmq_test", "tmq_test_topic") # init env + init_tmq_env("tmq_test", "tmq_test_topic") # init env consumer = Consumer( { "group.id": "tg2", @@ -33,9 +39,9 @@ if __name__ == '__main__': try: while True: - res = consumer.poll(100) + res = consumer.poll(1) if not res: - continue + break err = res.error() if err is not None: raise err @@ -46,3 +52,4 @@ if __name__ == '__main__': finally: consumer.unsubscribe() consumer.close() + cleanup("tmq_test", "tmq_test_topic") diff --git a/docs/zh/12-taos-sql/02-database.md b/docs/zh/12-taos-sql/02-database.md index 7d9566f4f257ed7a0ffdcdfb06d0d7116bf24043..fc35da863676943e152d5c113b70e5b5bd6b566e 100644 --- a/docs/zh/12-taos-sql/02-database.md +++ b/docs/zh/12-taos-sql/02-database.md @@ -58,7 +58,7 @@ database_option: { - WAL_FSYNC_PERIOD:当 WAL 参数设置为 2 时,落盘的周期。默认为 3000,单位毫秒。最小为 0,表示每次写入立即落盘;最大为 180000,即三分钟。 - MAXROWS:文件块中记录的最大条数,默认为 4096 条。 - MINROWS:文件块中记录的最小条数,默认为 100 条。 -- KEEP:表示数据文件保存的天数,缺省值为 3650,取值范围 [1, 365000],且必须大于或等于 DURATION 参数值。数据库会自动删除保存时间超过 KEEP 值的数据。KEEP 可以使用加单位的表示形式,如 KEEP 100h、KEEP 10d 等,支持 m(分钟)、h(小时)和 d(天)三个单位。也可以不写单位,如 KEEP 50,此时默认单位为天。 +- KEEP:表示数据文件保存的天数,缺省值为 3650,取值范围 [1, 365000],且必须大于或等于 DURATION 参数值。数据库会自动删除保存时间超过 KEEP 值的数据。KEEP 可以使用加单位的表示形式,如 KEEP 100h、KEEP 10d 等,支持 m(分钟)、h(小时)和 d(天)三个单位。也可以不写单位,如 KEEP 50,此时默认单位为天。企业版支持[多级存储](https://docs.taosdata.com/tdinternal/arch/#%E5%A4%9A%E7%BA%A7%E5%AD%98%E5%82%A8)功能, 因此, 可以设置多个保存时间(多个以英文逗号分隔,最多 3 个,满足 keep 0 <= keep 1 <= keep 2,如 KEEP 100h,100d,3650d); 社区版不支持多级存储功能(即使配置了多个保存时间, 也不会生效, KEEP 会取最大的保存时间)。 - PAGES:一个 VNODE 中元数据存储引擎的缓存页个数,默认为 256,最小 64。一个 VNODE 元数据存储占用 PAGESIZE \* PAGES,默认情况下为 1MB 内存。 - PAGESIZE:一个 VNODE 中元数据存储引擎的页大小,单位为 KB,默认为 4 KB。范围为 1 到 16384,即 1 KB 到 16 MB。 - PRECISION:数据库的时间戳精度。ms 表示毫秒,us 表示微秒,ns 表示纳秒,默认 ms 毫秒。 diff --git a/docs/zh/12-taos-sql/24-show.md b/docs/zh/12-taos-sql/24-show.md index 2b875199b501136afef4ce0ba1cbcdd1e0abc933..0a326729f204f95f61b7f9fd1c62c8aa180359bd 100644 --- a/docs/zh/12-taos-sql/24-show.md +++ b/docs/zh/12-taos-sql/24-show.md @@ -306,7 +306,7 @@ SHOW [db_name.]VGROUPS; ## SHOW VNODES ```sql -SHOW VNODES [dnode_name]; +SHOW VNODES {dnode_id | dnode_endpoint}; ``` 显示当前系统中所有 VNODE 或某个 DNODE 的 VNODE 的信息。 diff --git a/docs/zh/14-reference/12-config/index.md b/docs/zh/14-reference/12-config/index.md index 3b9dbabd49783efa3904edc9e4e799bf1843bb1d..503f6927648ce77eedc6c2a5d160a0d997bf00d7 100644 --- a/docs/zh/14-reference/12-config/index.md +++ b/docs/zh/14-reference/12-config/index.md @@ -323,6 +323,7 @@ charset 的有效值是 UTF-8。 | 适用范围 | 仅服务端适用 | | 含义 | 数据文件目录,所有的数据文件都将写入该目录 | | 缺省值 | /var/lib/taos | +| 补充说明 | [多级存储](https://docs.taosdata.com/tdinternal/arch/#%E5%A4%9A%E7%BA%A7%E5%AD%98%E5%82%A8) 功能需要与 [KEEP](https://docs.taosdata.com/taos-sql/database/#%E5%8F%82%E6%95%B0%E8%AF%B4%E6%98%8E) 参数配合使用 | ### tempDir diff --git a/include/common/tdataformat.h b/include/common/tdataformat.h index e0aacbfec9db8804b4003417689f404b78549afb..b5971d8a9e0c3318aadc4c0f697965c1e25b28d5 100644 --- a/include/common/tdataformat.h +++ b/include/common/tdataformat.h @@ -146,9 +146,9 @@ extern void (*tColDataCalcSMA[])(SColData *pColData, int64_t *sum, int64_t *max, int32_t tColDataAddValueByBind(SColData *pColData, TAOS_MULTI_BIND *pBind); void tColDataSortMerge(SArray *colDataArr); -//for raw block -int32_t tColDataAddValueByDataBlock(SColData *pColData, int8_t type, int32_t bytes, - int32_t nRows, char* lengthOrbitmap, char *data); +// for raw block +int32_t tColDataAddValueByDataBlock(SColData *pColData, int8_t type, int32_t bytes, int32_t nRows, char *lengthOrbitmap, + char *data); // for encode/decode int32_t tPutColData(uint8_t *pBuf, SColData *pColData); int32_t tGetColData(uint8_t *pBuf, SColData *pColData); @@ -261,7 +261,13 @@ struct STag { // STSchema ================================ STSchema *tBuildTSchema(SSchema *aSchema, int32_t numOfCols, int32_t version); -void tDestroyTSchema(STSchema *pTSchema); +#define tDestroyTSchema(pTSchema) \ + do { \ + if (pTSchema) { \ + taosMemoryFree(pTSchema); \ + pTSchema = NULL; \ + } \ + } while (0) #endif diff --git a/include/common/tmsg.h b/include/common/tmsg.h index c5e9a1275610ea61ca96f343ff2000902f7290df..caf67ee3a99cf054a7b6add325fbd1af7b5e8b3f 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -914,6 +914,7 @@ typedef struct { int32_t numOfRetensions; SArray* pRetensions; int8_t schemaless; + int16_t sstTrigger; } SDbCfgRsp; int32_t tSerializeSDbCfgRsp(void* buf, int32_t bufLen, const SDbCfgRsp* pRsp); diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 0f57da2b804c7c58411ee7ffa2cbb1fc4d68b3e8..1c52d7ea5df4904938201ee379c7b1068b25c0a0 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -370,7 +370,8 @@ int32_t tDecodeSStreamTask(SDecoder* pDecoder, SStreamTask* pTask); void tFreeSStreamTask(SStreamTask* pTask); static FORCE_INLINE int32_t streamTaskInput(SStreamTask* pTask, SStreamQueueItem* pItem) { - if (pItem->type == STREAM_INPUT__DATA_SUBMIT) { + int8_t type = pItem->type; + if (type == STREAM_INPUT__DATA_SUBMIT) { SStreamDataSubmit2* pSubmitClone = streamSubmitRefClone((SStreamDataSubmit2*)pItem); if (pSubmitClone == NULL) { qDebug("task %d %p submit enqueue failed since out of memory", pTask->taskId, pTask); @@ -382,19 +383,19 @@ static FORCE_INLINE int32_t streamTaskInput(SStreamTask* pTask, SStreamQueueItem pSubmitClone->submit.msgStr, pSubmitClone->submit.msgLen, pSubmitClone->submit.ver); taosWriteQitem(pTask->inputQueue->queue, pSubmitClone); // qStreamInput(pTask->exec.executor, pSubmitClone); - } else if (pItem->type == STREAM_INPUT__DATA_BLOCK || pItem->type == STREAM_INPUT__DATA_RETRIEVE || - pItem->type == STREAM_INPUT__REF_DATA_BLOCK) { + } else if (type == STREAM_INPUT__DATA_BLOCK || type == STREAM_INPUT__DATA_RETRIEVE || + type == STREAM_INPUT__REF_DATA_BLOCK) { taosWriteQitem(pTask->inputQueue->queue, pItem); // qStreamInput(pTask->exec.executor, pItem); - } else if (pItem->type == STREAM_INPUT__CHECKPOINT) { + } else if (type == STREAM_INPUT__CHECKPOINT) { taosWriteQitem(pTask->inputQueue->queue, pItem); // qStreamInput(pTask->exec.executor, pItem); - } else if (pItem->type == STREAM_INPUT__GET_RES) { + } else if (type == STREAM_INPUT__GET_RES) { taosWriteQitem(pTask->inputQueue->queue, pItem); // qStreamInput(pTask->exec.executor, pItem); } - if (pItem->type != STREAM_INPUT__GET_RES && pItem->type != STREAM_INPUT__CHECKPOINT && pTask->triggerParam != 0) { + if (type != STREAM_INPUT__GET_RES && type != STREAM_INPUT__CHECKPOINT && pTask->triggerParam != 0) { atomic_val_compare_exchange_8(&pTask->triggerStatus, TASK_TRIGGER_STATUS__INACTIVE, TASK_TRIGGER_STATUS__ACTIVE); } diff --git a/include/libs/sync/sync.h b/include/libs/sync/sync.h index 4c23c1f5575434cc971a49a042df4ce1720b73ef..defafce30eb14e8c2cf6aaa4408199340a9d2adf 100644 --- a/include/libs/sync/sync.h +++ b/include/libs/sync/sync.h @@ -193,7 +193,7 @@ typedef struct SSyncLogStore { SyncIndex (*syncLogLastIndex)(struct SSyncLogStore* pLogStore); SyncTerm (*syncLogLastTerm)(struct SSyncLogStore* pLogStore); - int32_t (*syncLogAppendEntry)(struct SSyncLogStore* pLogStore, SSyncRaftEntry* pEntry); + int32_t (*syncLogAppendEntry)(struct SSyncLogStore* pLogStore, SSyncRaftEntry* pEntry, bool forcSync); int32_t (*syncLogGetEntry)(struct SSyncLogStore* pLogStore, SyncIndex index, SSyncRaftEntry** ppEntry); int32_t (*syncLogTruncate)(struct SSyncLogStore* pLogStore, SyncIndex fromIndex); diff --git a/include/libs/wal/wal.h b/include/libs/wal/wal.h index a1ae1e429dd5dbb18f6521b263576e7096482327..a0f421212a56603402c61c9bb2763a3d1e7cee1c 100644 --- a/include/libs/wal/wal.h +++ b/include/libs/wal/wal.h @@ -201,6 +201,7 @@ int32_t walFetchHead(SWalReader *pRead, int64_t ver, SWalCkHead *pHead); int32_t walFetchBody(SWalReader *pRead, SWalCkHead **ppHead); int32_t walSkipFetchBody(SWalReader *pRead, const SWalCkHead *pHead); +SWalRef *walRefFirstVer(SWal *, SWalRef *); SWalRef *walRefCommittedVer(SWal *); SWalRef *walOpenRef(SWal *); diff --git a/packaging/tools/install.sh b/packaging/tools/install.sh index 4be179b04d7241895c08b5b1d0e62ebef9603d09..7f95ca3d72000acd97259d934097153be91943cf 100755 --- a/packaging/tools/install.sh +++ b/packaging/tools/install.sh @@ -746,7 +746,7 @@ function is_version_compatible() { deb_erase() { confirm="" while [ "" == "${confirm}" ]; do - echo -e -n "${RED}Exist tdengine deb detected, do you want to remove it? [yes|no] ${NC}:" + echo -e -n "${RED}Existing TDengine deb is detected, do you want to remove it? [yes|no] ${NC}:" read confirm if [ "yes" == "$confirm" ]; then ${csudo}dpkg --remove tdengine ||: @@ -760,7 +760,7 @@ deb_erase() { rpm_erase() { confirm="" while [ "" == "${confirm}" ]; do - echo -e -n "${RED}Exist tdengine rpm detected, do you want to remove it? [yes|no] ${NC}:" + echo -e -n "${RED}Existing TDengine rpm is detected, do you want to remove it? [yes|no] ${NC}:" read confirm if [ "yes" == "$confirm" ]; then ${csudo}rpm -e tdengine ||: diff --git a/source/client/src/clientEnv.c b/source/client/src/clientEnv.c index ab51a64686ee7c719a89d35df9129f14f988f1de..fbab1ee08b847544c3711d73697636f502c2ce25 100644 --- a/source/client/src/clientEnv.c +++ b/source/client/src/clientEnv.c @@ -400,45 +400,6 @@ void destroyRequest(SRequestObj *pRequest) { removeRequest(pRequest->self); } -void taosClientCrash(int signum, void *sigInfo, void *context) { - taosIgnSignal(SIGTERM); - taosIgnSignal(SIGHUP); - taosIgnSignal(SIGINT); - taosIgnSignal(SIGBREAK); - -#if !defined(WINDOWS) - taosIgnSignal(SIGBUS); -#endif - taosIgnSignal(SIGABRT); - taosIgnSignal(SIGFPE); - taosIgnSignal(SIGSEGV); - - char *pMsg = NULL; - const char *flags = "UTL FATAL "; - ELogLevel level = DEBUG_FATAL; - int32_t dflag = 255; - int64_t msgLen= -1; - - if (tsEnableCrashReport) { - if (taosGenCrashJsonMsg(signum, &pMsg, lastClusterId, appInfo.startTime)) { - taosPrintLog(flags, level, dflag, "failed to generate crash json msg"); - goto _return; - } else { - msgLen = strlen(pMsg); - } - } - -_return: - - taosLogCrashInfo("taos", pMsg, msgLen, signum, sigInfo); - -#ifdef _TD_DARWIN_64 - exit(signum); -#elif defined(WINDOWS) - exit(signum); -#endif -} - void crashReportThreadFuncUnexpectedStopped(void) { atomic_store_32(&clientStop, -1); } static void *tscCrashReportThreadFp(void *param) { @@ -535,15 +496,26 @@ void tscStopCrashReport() { } } -static void tscSetSignalHandle() { -#if !defined(WINDOWS) - taosSetSignal(SIGBUS, taosClientCrash); -#endif - taosSetSignal(SIGABRT, taosClientCrash); - taosSetSignal(SIGFPE, taosClientCrash); - taosSetSignal(SIGSEGV, taosClientCrash); + +void tscWriteCrashInfo(int signum, void *sigInfo, void *context) { + char *pMsg = NULL; + const char *flags = "UTL FATAL "; + ELogLevel level = DEBUG_FATAL; + int32_t dflag = 255; + int64_t msgLen= -1; + + if (tsEnableCrashReport) { + if (taosGenCrashJsonMsg(signum, &pMsg, lastClusterId, appInfo.startTime)) { + taosPrintLog(flags, level, dflag, "failed to generate crash json msg"); + } else { + msgLen = strlen(pMsg); + } + } + + taosLogCrashInfo("taos", pMsg, msgLen, signum, sigInfo); } + void taos_init_imp(void) { // In the APIs of other program language, taos_cleanup is not available yet. // So, to make sure taos_cleanup will be invoked to clean up the allocated resource to suppress the valgrind warning. @@ -567,8 +539,6 @@ void taos_init_imp(void) { return; } - tscSetSignalHandle(); - initQueryModuleMsgHandle(); if (taosConvInit() != 0) { diff --git a/source/client/src/clientImpl.c b/source/client/src/clientImpl.c index 26d5d5fcc0949a99da2fcbd6b3ae7e4e3109cb59..a21d244c08ce55ad744abc360d75ecfa19fecda7 100644 --- a/source/client/src/clientImpl.c +++ b/source/client/src/clientImpl.c @@ -1239,7 +1239,7 @@ STscObj* taosConnectImpl(const char* user, const char* auth, const char* db, __t int64_t transporterId = 0; asyncSendMsgToServer(pTscObj->pAppInfo->pTransporter, &pTscObj->pAppInfo->mgmtEp.epSet, &transporterId, body); - + tsem_wait(&pRequest->body.rspSem); if (pRequest->code != TSDB_CODE_SUCCESS) { const char* errorMsg = diff --git a/source/client/src/clientMain.c b/source/client/src/clientMain.c index 1179f4d23a024c5657e6cc082143a919bcb7683c..7b04603f95c3880cb75b5a48cfe607aa05eba9f0 100644 --- a/source/client/src/clientMain.c +++ b/source/client/src/clientMain.c @@ -528,9 +528,8 @@ void taos_stop_query(TAOS_RES *res) { SRequestObj *pRequest = (SRequestObj *)res; pRequest->killed = true; - int32_t numOfFields = taos_num_fields(pRequest); // It is not a query, no need to stop. - if (numOfFields == 0) { + if (NULL == pRequest->pQuery || QUERY_EXEC_MODE_SCHEDULE != pRequest->pQuery->execMode) { tscDebug("request 0x%" PRIx64 " no need to be killed since not query", pRequest->requestId); return; } diff --git a/source/client/src/clientRawBlockWrite.c b/source/client/src/clientRawBlockWrite.c index c3dcdbd221c23b9feba1e97a949808e54cb346b8..c6f08d55c10bf953bcf76520149987ede27001ff 100644 --- a/source/client/src/clientRawBlockWrite.c +++ b/source/client/src/clientRawBlockWrite.c @@ -179,7 +179,7 @@ static char* buildAlterSTableJson(void* alterData, int32_t alterDataLen) { } string = cJSON_PrintUnformatted(json); - end: +end: cJSON_Delete(json); tFreeSMAltertbReq(&req); return string; @@ -200,7 +200,7 @@ static char* processCreateStb(SMqMetaRsp* metaRsp) { } string = buildCreateTableJson(&req.schemaRow, &req.schemaTag, req.name, req.suid, TSDB_SUPER_TABLE); - _err: +_err: tDecoderClear(&coder); return string; } @@ -220,7 +220,7 @@ static char* processAlterStb(SMqMetaRsp* metaRsp) { } string = buildAlterSTableJson(req.alterOriData, req.alterOriDataLen); - _err: +_err: tDecoderClear(&coder); return string; } @@ -302,7 +302,7 @@ static void buildChildElement(cJSON* json, SVCreateTbReq* pCreateReq) { cJSON_AddItemToArray(tags, tag); } - end: +end: cJSON_AddItemToObject(json, "tags", tags); taosArrayDestroy(pTagVals); } @@ -360,7 +360,7 @@ static char* processCreateTable(SMqMetaRsp* metaRsp) { } } - _exit: +_exit: for (int32_t iReq = 0; iReq < req.nReqs; iReq++) { pCreateReq = req.pReqs + iReq; taosMemoryFreeClear(pCreateReq->comment); @@ -373,7 +373,7 @@ static char* processCreateTable(SMqMetaRsp* metaRsp) { } static char* processAutoCreateTable(STaosxRsp* rsp) { - if(rsp->createTableNum <= 0){ + if (rsp->createTableNum <= 0) { uError("WriteRaw:processAutoCreateTable rsp->createTableNum <= 0"); goto _exit; } @@ -392,14 +392,14 @@ static char* processAutoCreateTable(STaosxRsp* rsp) { goto _exit; } - if(pCreateReq[iReq].type != TSDB_CHILD_TABLE){ + if (pCreateReq[iReq].type != TSDB_CHILD_TABLE) { uError("WriteRaw:processAutoCreateTable pCreateReq[iReq].type != TSDB_CHILD_TABLE"); goto _exit; } } string = buildCreateCTableJson(pCreateReq, rsp->createTableNum); - _exit: +_exit: for (int i = 0; i < rsp->createTableNum; i++) { tDecoderClear(&decoder[i]); taosMemoryFreeClear(pCreateReq[i].comment); @@ -500,7 +500,7 @@ static char* processAlterTable(SMqMetaRsp* metaRsp) { char* buf = NULL; if (vAlterTbReq.tagType == TSDB_DATA_TYPE_JSON) { - if(!tTagIsJson(vAlterTbReq.pTagVal)){ + if (!tTagIsJson(vAlterTbReq.pTagVal)) { uError("processAlterTable isJson false"); goto _exit; } @@ -524,7 +524,7 @@ static char* processAlterTable(SMqMetaRsp* metaRsp) { } string = cJSON_PrintUnformatted(json); - _exit: +_exit: cJSON_Delete(json); tDecoderClear(&decoder); return string; @@ -557,12 +557,12 @@ static char* processDropSTable(SMqMetaRsp* metaRsp) { string = cJSON_PrintUnformatted(json); - _exit: +_exit: cJSON_Delete(json); tDecoderClear(&decoder); return string; } -static char* processDeleteTable(SMqMetaRsp* metaRsp){ +static char* processDeleteTable(SMqMetaRsp* metaRsp) { SDeleteRes req = {0}; SDecoder coder = {0}; int32_t code = TSDB_CODE_SUCCESS; @@ -596,7 +596,7 @@ static char* processDeleteTable(SMqMetaRsp* metaRsp){ string = cJSON_PrintUnformatted(json); - _exit: +_exit: cJSON_Delete(json); tDecoderClear(&coder); return string; @@ -638,7 +638,7 @@ static char* processDropTable(SMqMetaRsp* metaRsp) { string = cJSON_PrintUnformatted(json); - _exit: +_exit: cJSON_Delete(json); tDecoderClear(&decoder); return string; @@ -726,7 +726,7 @@ static int32_t taosCreateStb(TAOS* taos, void* meta, int32_t metaLen) { code = pRequest->code; taosMemoryFree(pCmdMsg.pMsg); - end: +end: destroyRequest(pRequest); tFreeSMCreateStbReq(&pReq); tDecoderClear(&coder); @@ -796,7 +796,7 @@ static int32_t taosDropStb(TAOS* taos, void* meta, int32_t metaLen) { code = pRequest->code; taosMemoryFree(pCmdMsg.pMsg); - end: +end: destroyRequest(pRequest); tDecoderClear(&coder); return code; @@ -857,9 +857,9 @@ static int32_t taosCreateTable(TAOS* taos, void* meta, int32_t metaLen) { taosHashSetFreeFp(pVgroupHashmap, destroyCreateTbReqBatch); SRequestConnInfo conn = {.pTrans = pTscObj->pAppInfo->pTransporter, - .requestId = pRequest->requestId, - .requestObjRefId = pRequest->self, - .mgmtEps = getEpSet_s(&pTscObj->pAppInfo->mgmtEp)}; + .requestId = pRequest->requestId, + .requestObjRefId = pRequest->self, + .mgmtEps = getEpSet_s(&pTscObj->pAppInfo->mgmtEp)}; pRequest->tableList = taosArrayInit(req.nReqs, sizeof(SName)); // loop to create table @@ -939,7 +939,7 @@ static int32_t taosCreateTable(TAOS* taos, void* meta, int32_t metaLen) { code = pRequest->code; - end: +end: for (int32_t iReq = 0; iReq < req.nReqs; iReq++) { pCreateReq = req.pReqs + iReq; taosMemoryFreeClear(pCreateReq->comment); @@ -1009,9 +1009,9 @@ static int32_t taosDropTable(TAOS* taos, void* meta, int32_t metaLen) { taosHashSetFreeFp(pVgroupHashmap, destroyDropTbReqBatch); SRequestConnInfo conn = {.pTrans = pTscObj->pAppInfo->pTransporter, - .requestId = pRequest->requestId, - .requestObjRefId = pRequest->self, - .mgmtEps = getEpSet_s(&pTscObj->pAppInfo->mgmtEp)}; + .requestId = pRequest->requestId, + .requestObjRefId = pRequest->self, + .mgmtEps = getEpSet_s(&pTscObj->pAppInfo->mgmtEp)}; pRequest->tableList = taosArrayInit(req.nReqs, sizeof(SName)); // loop to create table for (int32_t iReq = 0; iReq < req.nReqs; iReq++) { @@ -1063,7 +1063,7 @@ static int32_t taosDropTable(TAOS* taos, void* meta, int32_t metaLen) { } code = pRequest->code; - end: +end: taosHashCleanup(pVgroupHashmap); destroyRequest(pRequest); tDecoderClear(&coder); @@ -1131,7 +1131,7 @@ static int32_t taosDeleteData(TAOS* taos, void* meta, int32_t metaLen) { } taos_free_result(res); - end: +end: tDecoderClear(&coder); return code; } @@ -1178,9 +1178,9 @@ static int32_t taosAlterTable(TAOS* taos, void* meta, int32_t metaLen) { } SRequestConnInfo conn = {.pTrans = pTscObj->pAppInfo->pTransporter, - .requestId = pRequest->requestId, - .requestObjRefId = pRequest->self, - .mgmtEps = getEpSet_s(&pTscObj->pAppInfo->mgmtEp)}; + .requestId = pRequest->requestId, + .requestObjRefId = pRequest->self, + .mgmtEps = getEpSet_s(&pTscObj->pAppInfo->mgmtEp)}; SVgroupInfo pInfo = {0}; SName pName = {0}; @@ -1239,7 +1239,7 @@ static int32_t taosAlterTable(TAOS* taos, void* meta, int32_t metaLen) { code = handleAlterTbExecRes(pRes->res, pCatalog); } } - end: +end: taosArrayDestroy(pArray); if (pVgData) taosMemoryFreeClear(pVgData->pData); taosMemoryFreeClear(pVgData); @@ -1402,7 +1402,7 @@ int taos_write_raw_block(TAOS* taos, int rows, char* pData, const char* tbname) launchQueryImpl(pRequest, pQuery, true, NULL); code = pRequest->code; - end: +end: taosMemoryFreeClear(pTableMeta); qDestroyQuery(pQuery); destroyRequest(pRequest); @@ -1532,7 +1532,7 @@ static int32_t tmqWriteRawDataImpl(TAOS* taos, void* data, int32_t dataLen) { launchQueryImpl(pRequest, pQuery, true, NULL); code = pRequest->code; - end: +end: tDeleteSMqDataRsp(&rspObj.rsp); tDecoderClear(&decoder); qDestroyQuery(pQuery); @@ -1631,7 +1631,7 @@ static int32_t tmqWriteRawMetaDataImpl(TAOS* taos, void* data, int32_t dataLen) goto end; } - if(pCreateReq.type != TSDB_CHILD_TABLE){ + if (pCreateReq.type != TSDB_CHILD_TABLE) { uError("WriteRaw:pCreateReq.type != TSDB_CHILD_TABLE. table name: %s", tbName); code = TSDB_CODE_TSC_INVALID_VALUE; goto end; diff --git a/source/common/src/tdataformat.c b/source/common/src/tdataformat.c index 5e001a96872306d9369d388f8823a2baecdbf04f..7b3038280acfab0313f61bdb407fbda0a6751c27 100644 --- a/source/common/src/tdataformat.c +++ b/source/common/src/tdataformat.c @@ -1532,10 +1532,6 @@ STSchema *tBuildTSchema(SSchema *aSchema, int32_t numOfCols, int32_t version) { return pTSchema; } -void tDestroyTSchema(STSchema *pTSchema) { - if (pTSchema) taosMemoryFree(pTSchema); -} - // SColData ======================================== void tColDataDestroy(void *ph) { SColData *pColData = (SColData *)ph; diff --git a/source/common/src/tmsg.c b/source/common/src/tmsg.c index 20bb265e783b5149c0364ad7e0b567b6a5ca678f..95f74da8034eb70fff4c09802e55e6f754b79323 100644 --- a/source/common/src/tmsg.c +++ b/source/common/src/tmsg.c @@ -2821,8 +2821,8 @@ int32_t tSerializeSDbCfgRsp(void *buf, int32_t bufLen, const SDbCfgRsp *pRsp) { if (tEncodeI8(&encoder, pRetension->keepUnit) < 0) return -1; } if (tEncodeI8(&encoder, pRsp->schemaless) < 0) return -1; + if (tEncodeI16(&encoder, pRsp->sstTrigger) < 0) return -1; tEndEncode(&encoder); - int32_t tlen = encoder.pos; tEncoderClear(&encoder); return tlen; @@ -2873,6 +2873,7 @@ int32_t tDeserializeSDbCfgRsp(void *buf, int32_t bufLen, SDbCfgRsp *pRsp) { } } if (tDecodeI8(&decoder, &pRsp->schemaless) < 0) return -1; + if (tDecodeI16(&decoder, &pRsp->sstTrigger) < 0) return -1; tEndDecode(&decoder); tDecoderClear(&decoder); diff --git a/source/dnode/mnode/impl/src/mndDb.c b/source/dnode/mnode/impl/src/mndDb.c index 7e5c29d56f11211b9475834e8c4a666547056398..bdfda14a32772644a476b6ad6cbdc7d0e3989795 100644 --- a/source/dnode/mnode/impl/src/mndDb.c +++ b/source/dnode/mnode/impl/src/mndDb.c @@ -889,7 +889,7 @@ static int32_t mndProcessGetDbCfgReq(SRpcMsg *pReq) { cfgRsp.numOfRetensions = pDb->cfg.numOfRetensions; cfgRsp.pRetensions = pDb->cfg.pRetensions; cfgRsp.schemaless = pDb->cfg.schemaless; - + cfgRsp.sstTrigger = pDb->cfg.sstTrigger; int32_t contLen = tSerializeSDbCfgRsp(NULL, 0, &cfgRsp); void *pRsp = rpcMallocCont(contLen); if (pRsp == NULL) { diff --git a/source/dnode/vnode/inc/vnode.h b/source/dnode/vnode/inc/vnode.h index 33325d41f3c5905a4471c4225e4ee23537c8b953..fb1d8f1fd831a3baa5a6ef3263c9ca1f3530e186 100644 --- a/source/dnode/vnode/inc/vnode.h +++ b/source/dnode/vnode/inc/vnode.h @@ -153,6 +153,8 @@ typedef struct SMTbCursor SMTbCursor; SMTbCursor *metaOpenTbCursor(SMeta *pMeta); void metaCloseTbCursor(SMTbCursor *pTbCur); int32_t metaTbCursorNext(SMTbCursor *pTbCur, ETableType jumpTableType); +int32_t metaTbCursorPrev(SMTbCursor *pTbCur); + #endif // tsdb diff --git a/source/dnode/vnode/src/inc/tsdb.h b/source/dnode/vnode/src/inc/tsdb.h index 8c8775548d21702a31d4adc7e9d7f1c6aa30fa4c..6ff085c8f1ad0ad95a0e8bec30395894ec1740c0 100644 --- a/source/dnode/vnode/src/inc/tsdb.h +++ b/source/dnode/vnode/src/inc/tsdb.h @@ -206,6 +206,7 @@ int32_t tsdbCmprColData(SColData *pColData, int8_t cmprAlg, SBlockCol *pBlockCol uint8_t **ppBuf); int32_t tsdbDecmprColData(uint8_t *pIn, SBlockCol *pBlockCol, int8_t cmprAlg, int32_t nVal, SColData *pColData, uint8_t **ppBuf); +int32_t tRowInfoCmprFn(const void *p1, const void *p2); // tsdbMemTable ============================================================================================== // SMemTable int32_t tsdbMemTableCreate(STsdb *pTsdb, SMemTable **ppMemTable); diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 9c5f47b731e2393aca1d21099fbfec6373d97cd0..84330b279f4f19b03a3a4fba614b8a775677b629 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -252,7 +252,7 @@ int32_t tsdbSnapReaderClose(STsdbSnapReader** ppReader); int32_t tsdbSnapRead(STsdbSnapReader* pReader, uint8_t** ppData); // STsdbSnapWriter ======================================== int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWriter** ppWriter); -int32_t tsdbSnapWrite(STsdbSnapWriter* pWriter, uint8_t* pData, uint32_t nData); +int32_t tsdbSnapWrite(STsdbSnapWriter* pWriter, SSnapDataHdr* pHdr); int32_t tsdbSnapWriterPrepareClose(STsdbSnapWriter* pWriter); int32_t tsdbSnapWriterClose(STsdbSnapWriter** ppWriter, int8_t rollback); // STqSnapshotReader == diff --git a/source/dnode/vnode/src/meta/metaQuery.c b/source/dnode/vnode/src/meta/metaQuery.c index 4b280a32f1f5915876dbeb8fd0ff4b6beaa4c658..6741b7ca456e78a99f326a77028511039c37b2a0 100644 --- a/source/dnode/vnode/src/meta/metaQuery.c +++ b/source/dnode/vnode/src/meta/metaQuery.c @@ -310,7 +310,7 @@ void metaCloseTbCursor(SMTbCursor *pTbCur) { } } -int metaTbCursorNext(SMTbCursor *pTbCur, ETableType jumpTableType) { +int32_t metaTbCursorNext(SMTbCursor *pTbCur, ETableType jumpTableType) { int ret; void *pBuf; STbCfg tbCfg; @@ -334,6 +334,30 @@ int metaTbCursorNext(SMTbCursor *pTbCur, ETableType jumpTableType) { return 0; } +int32_t metaTbCursorPrev(SMTbCursor *pTbCur) { + int ret; + void *pBuf; + STbCfg tbCfg; + + for (;;) { + ret = tdbTbcPrev(pTbCur->pDbc, &pTbCur->pKey, &pTbCur->kLen, &pTbCur->pVal, &pTbCur->vLen); + if (ret < 0) { + return -1; + } + + tDecoderClear(&pTbCur->mr.coder); + + metaGetTableEntryByVersion(&pTbCur->mr, ((SUidIdxVal *)pTbCur->pVal)[0].version, *(tb_uid_t *)pTbCur->pKey); + if (pTbCur->mr.me.type == TSDB_SUPER_TABLE) { + continue; + } + + break; + } + + return 0; +} + SSchemaWrapper *metaGetTableSchema(SMeta *pMeta, tb_uid_t uid, int32_t sver, int lock) { void *pData = NULL; int nData = 0; @@ -682,9 +706,8 @@ int32_t metaGetTbTSchemaEx(SMeta *pMeta, tb_uid_t suid, tb_uid_t uid, int32_t sv } } - if (sver <= 0) { - metaError("meta/query: incorrect sver: %" PRId32 ".", sver); - code = TSDB_CODE_FAILED; + if (ASSERTS(sver > 0, __FILE__, __LINE__, "failed to get table schema version: %d", sver)) { + code = TSDB_CODE_NOT_FOUND; goto _exit; } diff --git a/source/dnode/vnode/src/sma/smaSnapshot.c b/source/dnode/vnode/src/sma/smaSnapshot.c index de3d93395aa3790dcd231b461470ee35462abb46..c00e96a06664db0a60184fdb09e16ee0b68c3d45 100644 --- a/source/dnode/vnode/src/sma/smaSnapshot.c +++ b/source/dnode/vnode/src/sma/smaSnapshot.c @@ -446,10 +446,10 @@ int32_t rsmaSnapWrite(SRSmaSnapWriter* pWriter, uint8_t* pData, uint32_t nData) // rsma1/rsma2 if (pHdr->type == SNAP_DATA_RSMA1) { pHdr->type = SNAP_DATA_TSDB; - code = tsdbSnapWrite(pWriter->pDataWriter[0], pData, nData); + code = tsdbSnapWrite(pWriter->pDataWriter[0], pHdr); } else if (pHdr->type == SNAP_DATA_RSMA2) { pHdr->type = SNAP_DATA_TSDB; - code = tsdbSnapWrite(pWriter->pDataWriter[1], pData, nData); + code = tsdbSnapWrite(pWriter->pDataWriter[1], pHdr); } else if (pHdr->type == SNAP_DATA_QTASK) { code = rsmaSnapWriteQTaskInfo(pWriter, pData, nData); } else { diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index bda0256b517cd39fc4ac570f689569b0c6329c21..12cfd3a9e33eea324f45298a002e268ccf7dfa91 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -520,7 +520,12 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { tqOffsetResetToData(&fetchOffsetNew, 0, 0); } } else { - tqOffsetResetToLog(&fetchOffsetNew, walGetFirstVer(pTq->pVnode->pWal)); + pHandle->pRef = walRefFirstVer(pTq->pVnode->pWal, pHandle->pRef); + if (pHandle->pRef == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + tqOffsetResetToLog(&fetchOffsetNew, pHandle->pRef->refVer - 1); } } else if (reqOffset.type == TMQ_OFFSET__RESET_LATEST) { if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { diff --git a/source/dnode/vnode/src/tq/tqOffsetSnapshot.c b/source/dnode/vnode/src/tq/tqOffsetSnapshot.c index 2413a792c60c36a059e5411f2ff51d3792c12aa0..a4428aed4368fec9c96a2faae5fabd33cd8eb8f4 100644 --- a/source/dnode/vnode/src/tq/tqOffsetSnapshot.c +++ b/source/dnode/vnode/src/tq/tqOffsetSnapshot.c @@ -56,7 +56,7 @@ int32_t tqOffsetSnapRead(STqOffsetReader* pReader, uint8_t** ppData) { TdFilePtr pFile = taosOpenFile(fname, TD_FILE_READ); if (pFile == NULL) { taosMemoryFree(fname); - return -1; + return 0; } int64_t sz = 0; diff --git a/source/dnode/vnode/src/tsdb/tsdbCacheRead.c b/source/dnode/vnode/src/tsdb/tsdbCacheRead.c index 6a5acecfc3226753108863e3537f281028c8a34f..a2de1bdf4e1251d4f4276603da1d490a9c214692 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCacheRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbCacheRead.c @@ -268,7 +268,10 @@ int32_t tsdbRetrieveCacheRows(void* pReader, SSDataBlock* pResBlock, const int32 } taosThreadMutexLock(&pr->readerMutex); - tsdbTakeReadSnap((STsdbReader*)pr, tsdbCacheQueryReseek, &pr->pReadSnap); + code = tsdbTakeReadSnap((STsdbReader*)pr, tsdbCacheQueryReseek, &pr->pReadSnap); + if (code != TSDB_CODE_SUCCESS) { + goto _end; + } pr->pDataFReader = NULL; pr->pDataFReaderLast = NULL; @@ -279,7 +282,7 @@ int32_t tsdbRetrieveCacheRows(void* pReader, SSDataBlock* pResBlock, const int32 code = doExtractCacheRow(pr, lruCache, pKeyInfo->uid, &pRow, &h); if (code != TSDB_CODE_SUCCESS) { - return code; + goto _end; } if (h == NULL) { @@ -352,7 +355,7 @@ int32_t tsdbRetrieveCacheRows(void* pReader, SSDataBlock* pResBlock, const int32 STableKeyInfo* pKeyInfo = &pr->pTableList[i]; code = doExtractCacheRow(pr, lruCache, pKeyInfo->uid, &pRow, &h); if (code != TSDB_CODE_SUCCESS) { - return code; + goto _end; } if (h == NULL) { diff --git a/source/dnode/vnode/src/tsdb/tsdbFS.c b/source/dnode/vnode/src/tsdb/tsdbFS.c index 7dc839773f11b6bb823375f35281977e94b74f8b..51fdc69a95cc0a935a6b6e2abd390d5927384a50 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS.c +++ b/source/dnode/vnode/src/tsdb/tsdbFS.c @@ -458,9 +458,8 @@ static int32_t tsdbMergeFileSet(STsdb *pTsdb, SDFileSet *pSetOld, SDFileSet *pSe taosMemoryFree(pHeadF); } } else { - nRef = pHeadF->nRef; - *pHeadF = *pSetNew->pHeadF; - pHeadF->nRef = nRef; + ASSERT(pHeadF->offset == pSetNew->pHeadF->offset); + ASSERT(pHeadF->size == pSetNew->pHeadF->size); } // data @@ -481,9 +480,7 @@ static int32_t tsdbMergeFileSet(STsdb *pTsdb, SDFileSet *pSetOld, SDFileSet *pSe taosMemoryFree(pDataF); } } else { - nRef = pDataF->nRef; - *pDataF = *pSetNew->pDataF; - pDataF->nRef = nRef; + pDataF->size = pSetNew->pDataF->size; } // sma @@ -504,9 +501,7 @@ static int32_t tsdbMergeFileSet(STsdb *pTsdb, SDFileSet *pSetOld, SDFileSet *pSe taosMemoryFree(pSmaF); } } else { - nRef = pSmaF->nRef; - *pSmaF = *pSetNew->pSmaF; - pSmaF->nRef = nRef; + pSmaF->size = pSetNew->pSmaF->size; } // stt diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index d8c379f476943d64de5c93d9643b04ebfa832322..a2212292a77cfc664254a8a24af071dde6fbd2bf 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -573,6 +573,68 @@ static SSDataBlock* createResBlock(SQueryTableDataCond* pCond, int32_t capacity) return pResBlock; } +static int32_t tsdbInitReaderLock(STsdbReader* pReader) { + int32_t code = -1; + qTrace("tsdb/read: %p, pre-init read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); + + code = taosThreadMutexInit(&pReader->readerMutex, NULL); + + qTrace("tsdb/read: %p, post-init read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); + + return code; +} + +static int32_t tsdbUninitReaderLock(STsdbReader* pReader) { + int32_t code = -1; + qTrace("tsdb/read: %p, pre-uninit read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); + + code = taosThreadMutexDestroy(&pReader->readerMutex); + + qTrace("tsdb/read: %p, post-uninit read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); + + return code; +} + +static int32_t tsdbAcquireReader(STsdbReader* pReader) { + int32_t code = -1; + qTrace("tsdb/read: %p, pre-take read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); + + code = taosThreadMutexLock(&pReader->readerMutex); + + qTrace("tsdb/read: %p, post-take read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); + + return code; +} + +static int32_t tsdbTryAcquireReader(STsdbReader* pReader) { + int32_t code = -1; + qTrace("tsdb/read: %p, pre-trytake read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); + + code = taosThreadMutexTryLock(&pReader->readerMutex); + + qTrace("tsdb/read: %p, post-trytake read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); + + return code; +} + +static int32_t tsdbReleaseReader(STsdbReader* pReader) { + int32_t code = -1; + qTrace("tsdb/read: %p, pre-untake read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); + + code = taosThreadMutexUnlock(&pReader->readerMutex); + + qTrace("tsdb/read: %p, post-untake read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); + + return code; +} + +void tsdbReleaseDataBlock(STsdbReader* pReader) { + SReaderStatus* pStatus = &pReader->status; + if (!pStatus->composedDataBlock) { + tsdbReleaseReader(pReader); + } +} + static int32_t tsdbReaderCreate(SVnode* pVnode, SQueryTableDataCond* pCond, STsdbReader** ppReader, int32_t capacity, SSDataBlock* pResBlock, const char* idstr) { int32_t code = 0; @@ -636,7 +698,7 @@ static int32_t tsdbReaderCreate(SVnode* pVnode, SQueryTableDataCond* pCond, STsd setColumnIdSlotList(&pReader->suppInfo, pCond->colList, pCond->pSlotList, pCond->numOfCols); - taosThreadMutexInit(&pReader->readerMutex, NULL); + tsdbInitReaderLock(pReader); *ppReader = pReader; return code; @@ -1776,12 +1838,15 @@ static int32_t doMergeBufAndFileRows(STsdbReader* pReader, STableBlockScanInfo* } if (minKey == k.ts) { + STSchema* pSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(pRow), pReader, pBlockScanInfo->uid); + if (pSchema == NULL) { + return terrno; + } if (init) { - tsdbRowMerge(&merge, pRow); + tsdbRowMergerAdd(&merge, pRow, pSchema); } else { init = true; - STSchema* pSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(pRow), pReader, pBlockScanInfo->uid); - int32_t code = tsdbRowMergerInit(&merge, pRow, pSchema); + int32_t code = tsdbRowMergerInit(&merge, pRow, pSchema); if (code != TSDB_CODE_SUCCESS) { return code; } @@ -2882,7 +2947,7 @@ static int32_t doBuildDataBlock(STsdbReader* pReader) { if (pResBlock->info.rows > 0) { tsdbDebug("%p uid:%" PRIu64 ", composed data block created, brange:%" PRIu64 "-%" PRIu64 - " rows:%d, elapsed time:%.2f ms %s", + " rows:%d, elapsed time:%.2f ms %s", pReader, pResBlock->info.id.uid, pResBlock->info.window.skey, pResBlock->info.window.ekey, pResBlock->info.rows, el, pReader->idStr); } @@ -2932,7 +2997,7 @@ static int32_t doBuildDataBlock(STsdbReader* pReader) { if (pResBlock->info.rows > 0) { tsdbDebug("%p uid:%" PRIu64 ", composed data block created, brange:%" PRIu64 "-%" PRIu64 - " rows:%d, elapsed time:%.2f ms %s", + " rows:%d, elapsed time:%.2f ms %s", pReader, pResBlock->info.id.uid, pResBlock->info.window.skey, pResBlock->info.window.ekey, pResBlock->info.rows, el, pReader->idStr); } @@ -4013,8 +4078,9 @@ void tsdbReaderClose(STsdbReader* pReader) { qTrace("tsdb/reader: %p, untake snapshot", pReader); tsdbUntakeReadSnap(pReader, pReader->pReadSnap, true); + pReader->pReadSnap = NULL; - taosThreadMutexDestroy(&pReader->readerMutex); + tsdbUninitReaderLock(pReader); taosMemoryFree(pReader->status.uidCheckInfo.tableUidList); SIOCostSummary* pCost = &pReader->cost; @@ -4096,6 +4162,28 @@ int32_t tsdbReaderSuspend(STsdbReader* pReader) { // pInfo->lastKey = ts; } } else { + // resetDataBlockScanInfo excluding lastKey + STableBlockScanInfo** p = NULL; + + while ((p = taosHashIterate(pStatus->pTableMap, p)) != NULL) { + STableBlockScanInfo* pInfo = *(STableBlockScanInfo**)p; + + pInfo->iterInit = false; + pInfo->iter.hasVal = false; + pInfo->iiter.hasVal = false; + + if (pInfo->iter.iter != NULL) { + pInfo->iter.iter = tsdbTbDataIterDestroy(pInfo->iter.iter); + } + + if (pInfo->iiter.iter != NULL) { + pInfo->iiter.iter = tsdbTbDataIterDestroy(pInfo->iiter.iter); + } + + pInfo->delSkyline = taosArrayDestroy(pInfo->delSkyline); + // pInfo->lastKey = ts; + } + pBlockScanInfo = pStatus->pTableIter == NULL ? NULL : *pStatus->pTableIter; if (pBlockScanInfo) { // save lastKey to restore memory iterator @@ -4104,7 +4192,8 @@ int32_t tsdbReaderSuspend(STsdbReader* pReader) { // reset current current table's data block scan info, pBlockScanInfo->iterInit = false; - // pBlockScanInfo->iiter.hasVal = false; + pBlockScanInfo->iter.hasVal = false; + pBlockScanInfo->iiter.hasVal = false; if (pBlockScanInfo->iter.iter != NULL) { pBlockScanInfo->iter.iter = tsdbTbDataIterDestroy(pBlockScanInfo->iter.iter); } @@ -4138,16 +4227,16 @@ static int32_t tsdbSetQueryReseek(void* pQHandle) { int32_t code = 0; STsdbReader* pReader = pQHandle; - code = taosThreadMutexTryLock(&pReader->readerMutex); + code = tsdbTryAcquireReader(pReader); if (code == 0) { if (pReader->suspended) { - taosThreadMutexUnlock(&pReader->readerMutex); + tsdbReleaseReader(pReader); return code; } tsdbReaderSuspend(pReader); - taosThreadMutexUnlock(&pReader->readerMutex); + tsdbReleaseReader(pReader); return code; } else if (code == EBUSY) { @@ -4248,8 +4337,9 @@ bool tsdbNextDataBlock(STsdbReader* pReader) { SReaderStatus* pStatus = &pReader->status; - qTrace("tsdb/read: %p, take read mutex", pReader); - taosThreadMutexLock(&pReader->readerMutex); + int32_t code = tsdbAcquireReader(pReader); + qTrace("tsdb/read: %p, take read mutex, code: %d", pReader, code); + if (pReader->suspended) { tsdbReaderResume(pReader); } @@ -4261,7 +4351,7 @@ bool tsdbNextDataBlock(STsdbReader* pReader) { pStatus = &pReader->innerReader[0]->status; if (pStatus->composedDataBlock) { qTrace("tsdb/read: %p, unlock read mutex", pReader); - taosThreadMutexUnlock(&pReader->readerMutex); + tsdbReleaseReader(pReader); } return ret; @@ -4284,7 +4374,7 @@ bool tsdbNextDataBlock(STsdbReader* pReader) { if (ret) { if (pStatus->composedDataBlock) { qTrace("tsdb/read: %p, unlock read mutex", pReader); - taosThreadMutexUnlock(&pReader->readerMutex); + tsdbReleaseReader(pReader); } return ret; @@ -4304,7 +4394,7 @@ bool tsdbNextDataBlock(STsdbReader* pReader) { pStatus = &pReader->innerReader[1]->status; if (pStatus->composedDataBlock) { qTrace("tsdb/read: %p, unlock read mutex", pReader); - taosThreadMutexUnlock(&pReader->readerMutex); + tsdbReleaseReader(pReader); } return ret1; @@ -4312,7 +4402,7 @@ bool tsdbNextDataBlock(STsdbReader* pReader) { } qTrace("tsdb/read: %p, unlock read mutex", pReader); - taosThreadMutexUnlock(&pReader->readerMutex); + tsdbReleaseReader(pReader); return false; } @@ -4471,13 +4561,6 @@ static SSDataBlock* doRetrieveDataBlock(STsdbReader* pReader) { return pReader->pResBlock; } -void tsdbReleaseDataBlock(STsdbReader* pReader) { - // SReaderStatus* pStatus = &pReader->status; - // if (!pStatus->composedDataBlock) { - taosThreadMutexUnlock(&pReader->readerMutex); - //} -} - SSDataBlock* tsdbRetrieveDataBlock(STsdbReader* pReader, SArray* pIdList) { STsdbReader* pTReader = pReader; if (pReader->type == TIMEWINDOW_RANGE_EXTERNAL) { @@ -4496,7 +4579,7 @@ SSDataBlock* tsdbRetrieveDataBlock(STsdbReader* pReader, SArray* pIdList) { SSDataBlock* ret = doRetrieveDataBlock(pTReader); qTrace("tsdb/read-retrieve: %p, unlock read mutex", pReader); - taosThreadMutexUnlock(&pReader->readerMutex); + tsdbReleaseReader(pReader); return ret; } @@ -4505,7 +4588,7 @@ int32_t tsdbReaderReset(STsdbReader* pReader, SQueryTableDataCond* pCond) { SReaderStatus* pStatus = &pReader->status; qTrace("tsdb/reader-reset: %p, take read mutex", pReader); - taosThreadMutexLock(&pReader->readerMutex); + tsdbAcquireReader(pReader); if (pReader->suspended) { tsdbReaderResume(pReader); @@ -4514,7 +4597,7 @@ int32_t tsdbReaderReset(STsdbReader* pReader, SQueryTableDataCond* pCond) { if (isEmptyQueryTimeWindow(&pReader->window) || pReader->pReadSnap == NULL) { tsdbDebug("tsdb reader reset return %p", pReader->pReadSnap); - taosThreadMutexUnlock(&pReader->readerMutex); + tsdbReleaseReader(pReader); return TSDB_CODE_SUCCESS; } @@ -4552,7 +4635,7 @@ int32_t tsdbReaderReset(STsdbReader* pReader, SQueryTableDataCond* pCond) { tsdbError("%p reset reader failed, numOfTables:%d, query range:%" PRId64 " - %" PRId64 " in query %s", pReader, numOfTables, pReader->window.skey, pReader->window.ekey, pReader->idStr); - taosThreadMutexUnlock(&pReader->readerMutex); + tsdbReleaseReader(pReader); return code; } @@ -4563,7 +4646,7 @@ int32_t tsdbReaderReset(STsdbReader* pReader, SQueryTableDataCond* pCond) { pReader, pReader->suid, numOfTables, pCond->twindows.skey, pReader->window.skey, pReader->window.ekey, pReader->idStr); - taosThreadMutexUnlock(&pReader->readerMutex); + tsdbReleaseReader(pReader); return code; } @@ -4648,7 +4731,7 @@ int64_t tsdbGetNumOfRowsInMemTable(STsdbReader* pReader) { int64_t rows = 0; SReaderStatus* pStatus = &pReader->status; - taosThreadMutexLock(&pReader->readerMutex); + tsdbAcquireReader(pReader); if (pReader->suspended) { tsdbReaderResume(pReader); } @@ -4678,7 +4761,7 @@ int64_t tsdbGetNumOfRowsInMemTable(STsdbReader* pReader) { pStatus->pTableIter = taosHashIterate(pStatus->pTableMap, pStatus->pTableIter); } - taosThreadMutexUnlock(&pReader->readerMutex); + tsdbReleaseReader(pReader); return rows; } diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index 5c7bfee5a7bc07bdf1e91015e4d3d36e5873d7ec..c280e8c0e747f1ce450c187d5e3d51183ca02973 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -15,274 +15,628 @@ #include "tsdb.h" -// STsdbSnapReader ======================================== -typedef enum { SNAP_DATA_FILE_ITER = 0, SNAP_STT_FILE_ITER } EFIterT; +extern int32_t tsdbReadDataBlockEx(SDataFReader* pReader, SDataBlk* pDataBlk, SBlockData* pBlockData); +extern int32_t tsdbUpdateTableSchema(SMeta* pMeta, int64_t suid, int64_t uid, SSkmInfo* pSkmInfo); +extern int32_t tsdbWriteDataBlock(SDataFWriter* pWriter, SBlockData* pBlockData, SMapData* mDataBlk, int8_t cmprAlg); +extern int32_t tsdbWriteSttBlock(SDataFWriter* pWriter, SBlockData* pBlockData, SArray* aSttBlk, int8_t cmprAlg); + +// STsdbDataIter2 ======================================== +#define TSDB_MEM_TABLE_DATA_ITER 0 +#define TSDB_DATA_FILE_DATA_ITER 1 +#define TSDB_STT_FILE_DATA_ITER 2 +#define TSDB_TOMB_FILE_DATA_ITER 3 + +typedef struct STsdbDataIter2 STsdbDataIter2; +typedef struct STsdbFilterInfo STsdbFilterInfo; + typedef struct { - SRBTreeNode n; - SRowInfo rInfo; - EFIterT type; + int64_t suid; + int64_t uid; + SDelData delData; +} SDelInfo; + +struct STsdbDataIter2 { + STsdbDataIter2* next; + SRBTreeNode rbtn; + + int32_t type; + SRowInfo rowInfo; + SDelInfo delInfo; union { + // TSDB_MEM_TABLE_DATA_ITER struct { - SArray* aBlockIdx; - int32_t iBlockIdx; - SBlockIdx* pBlockIdx; - SMapData mBlock; - int32_t iBlock; - }; // .data file + SMemTable* pMemTable; + } mIter; + + // TSDB_DATA_FILE_DATA_ITER struct { - int32_t iStt; - SArray* aSttBlk; - int32_t iSttBlk; - }; // .stt file + SDataFReader* pReader; + SArray* aBlockIdx; // SArray + SMapData mDataBlk; + SBlockData bData; + int32_t iBlockIdx; + int32_t iDataBlk; + int32_t iRow; + } dIter; + + // TSDB_STT_FILE_DATA_ITER + struct { + SDataFReader* pReader; + int32_t iStt; + SArray* aSttBlk; + SBlockData bData; + int32_t iSttBlk; + int32_t iRow; + } sIter; + // TSDB_TOMB_FILE_DATA_ITER + struct { + SDelFReader* pReader; + SArray* aDelIdx; + SArray* aDelData; + int32_t iDelIdx; + int32_t iDelData; + } tIter; }; - SBlockData bData; - int32_t iRow; -} SFDataIter; +}; -struct STsdbSnapReader { - STsdb* pTsdb; +#define TSDB_FILTER_FLAG_BY_VERSION 0x1 +struct STsdbFilterInfo { + int32_t flag; int64_t sver; int64_t ever; - STsdbFS fs; - int8_t type; - // for data file - int8_t dataDone; - int32_t fid; - SDataFReader* pDataFReader; - SFDataIter* pIter; - SRBTree rbt; - SFDataIter aFDataIter[TSDB_MAX_STT_TRIGGER + 1]; - SBlockData bData; - SSkmInfo skmTable; - // for del file - int8_t delDone; - SDelFReader* pDelFReader; - SArray* aDelIdx; // SArray - int32_t iDelIdx; - SArray* aDelData; // SArray - uint8_t* aBuf[5]; }; -extern int32_t tRowInfoCmprFn(const void* p1, const void* p2); -extern int32_t tsdbReadDataBlockEx(SDataFReader* pReader, SDataBlk* pDataBlk, SBlockData* pBlockData); -extern int32_t tsdbUpdateTableSchema(SMeta* pMeta, int64_t suid, int64_t uid, SSkmInfo* pSkmInfo); +#define TSDB_RBTN_TO_DATA_ITER(pNode) ((STsdbDataIter2*)(((char*)pNode) - offsetof(STsdbDataIter2, rbtn))) -static int32_t tFDataIterCmprFn(const SRBTreeNode* pNode1, const SRBTreeNode* pNode2) { - SFDataIter* pIter1 = (SFDataIter*)(((uint8_t*)pNode1) - offsetof(SFDataIter, n)); - SFDataIter* pIter2 = (SFDataIter*)(((uint8_t*)pNode2) - offsetof(SFDataIter, n)); +/* open */ +static int32_t tsdbOpenDataFileDataIter(SDataFReader* pReader, STsdbDataIter2** ppIter) { + int32_t code = 0; + int32_t lino = 0; - return tRowInfoCmprFn(&pIter1->rInfo, &pIter2->rInfo); + // create handle + STsdbDataIter2* pIter = (STsdbDataIter2*)taosMemoryCalloc(1, sizeof(*pIter)); + if (pIter == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + pIter->type = TSDB_DATA_FILE_DATA_ITER; + pIter->dIter.pReader = pReader; + if ((pIter->dIter.aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx))) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tBlockDataCreate(&pIter->dIter.bData); + TSDB_CHECK_CODE(code, lino, _exit); + + pIter->dIter.iBlockIdx = 0; + pIter->dIter.iDataBlk = 0; + pIter->dIter.iRow = 0; + + // read data + code = tsdbReadBlockIdx(pReader, pIter->dIter.aBlockIdx); + TSDB_CHECK_CODE(code, lino, _exit); + + if (taosArrayGetSize(pIter->dIter.aBlockIdx) == 0) goto _clear; + +_exit: + if (code) { + if (pIter) { + _clear: + tBlockDataDestroy(&pIter->dIter.bData); + taosArrayDestroy(pIter->dIter.aBlockIdx); + taosMemoryFree(pIter); + pIter = NULL; + } + } + *ppIter = pIter; + return code; } -static int32_t tsdbSnapReadOpenFile(STsdbSnapReader* pReader) { +static int32_t tsdbOpenSttFileDataIter(SDataFReader* pReader, int32_t iStt, STsdbDataIter2** ppIter) { int32_t code = 0; int32_t lino = 0; - SDFileSet dFileSet = {.fid = pReader->fid}; - SDFileSet* pSet = taosArraySearch(pReader->fs.aDFileSet, &dFileSet, tDFileSetCmprFn, TD_GT); - if (pSet == NULL) return code; + // create handle + STsdbDataIter2* pIter = (STsdbDataIter2*)taosMemoryCalloc(1, sizeof(*pIter)); + if (pIter == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } - pReader->fid = pSet->fid; - code = tsdbDataFReaderOpen(&pReader->pDataFReader, pReader->pTsdb, pSet); - TSDB_CHECK_CODE(code, lino, _exit); + pIter->type = TSDB_STT_FILE_DATA_ITER; + pIter->sIter.pReader = pReader; + pIter->sIter.iStt = iStt; + pIter->sIter.aSttBlk = taosArrayInit(0, sizeof(SSttBlk)); + if (pIter->sIter.aSttBlk == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } - pReader->pIter = NULL; - tRBTreeCreate(&pReader->rbt, tFDataIterCmprFn); + code = tBlockDataCreate(&pIter->sIter.bData); + TSDB_CHECK_CODE(code, lino, _exit); - // .data file - SFDataIter* pIter = &pReader->aFDataIter[0]; - pIter->type = SNAP_DATA_FILE_ITER; + pIter->sIter.iSttBlk = 0; + pIter->sIter.iRow = 0; - code = tsdbReadBlockIdx(pReader->pDataFReader, pIter->aBlockIdx); + // read data + code = tsdbReadSttBlk(pReader, iStt, pIter->sIter.aSttBlk); TSDB_CHECK_CODE(code, lino, _exit); - for (pIter->iBlockIdx = 0; pIter->iBlockIdx < taosArrayGetSize(pIter->aBlockIdx); pIter->iBlockIdx++) { - pIter->pBlockIdx = (SBlockIdx*)taosArrayGet(pIter->aBlockIdx, pIter->iBlockIdx); + if (taosArrayGetSize(pIter->sIter.aSttBlk) == 0) goto _clear; - code = tsdbReadDataBlk(pReader->pDataFReader, pIter->pBlockIdx, &pIter->mBlock); +_exit: + if (code) { + if (pIter) { + _clear: + taosArrayDestroy(pIter->sIter.aSttBlk); + tBlockDataDestroy(&pIter->sIter.bData); + taosMemoryFree(pIter); + pIter = NULL; + } + } + *ppIter = pIter; + return code; +} + +static int32_t tsdbOpenTombFileDataIter(SDelFReader* pReader, STsdbDataIter2** ppIter) { + int32_t code = 0; + int32_t lino = 0; + + STsdbDataIter2* pIter = (STsdbDataIter2*)taosMemoryCalloc(1, sizeof(*pIter)); + if (pIter == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; TSDB_CHECK_CODE(code, lino, _exit); + } + pIter->type = TSDB_TOMB_FILE_DATA_ITER; - for (pIter->iBlock = 0; pIter->iBlock < pIter->mBlock.nItem; pIter->iBlock++) { - SDataBlk dataBlk; - tMapDataGetItemByIdx(&pIter->mBlock, pIter->iBlock, &dataBlk, tGetDataBlk); + pIter->tIter.pReader = pReader; + if ((pIter->tIter.aDelIdx = taosArrayInit(0, sizeof(SDelIdx))) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + if ((pIter->tIter.aDelData = taosArrayInit(0, sizeof(SDelData))) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } - if (dataBlk.minVer > pReader->ever || dataBlk.maxVer < pReader->sver) continue; + code = tsdbReadDelIdx(pReader, pIter->tIter.aDelIdx); + TSDB_CHECK_CODE(code, lino, _exit); - code = tsdbReadDataBlockEx(pReader->pDataFReader, &dataBlk, &pIter->bData); - TSDB_CHECK_CODE(code, lino, _exit); + if (taosArrayGetSize(pIter->tIter.aDelIdx) == 0) goto _clear; - ASSERT(pIter->pBlockIdx->suid == pIter->bData.suid); - ASSERT(pIter->pBlockIdx->uid == pIter->bData.uid); + pIter->tIter.iDelIdx = 0; + pIter->tIter.iDelData = 0; - for (pIter->iRow = 0; pIter->iRow < pIter->bData.nRow; pIter->iRow++) { - int64_t rowVer = pIter->bData.aVersion[pIter->iRow]; +_exit: + if (code) { + if (pIter) { + _clear: + taosArrayDestroy(pIter->tIter.aDelIdx); + taosArrayDestroy(pIter->tIter.aDelData); + taosMemoryFree(pIter); + pIter = NULL; + } + } + *ppIter = pIter; + return code; +} - if (rowVer >= pReader->sver && rowVer <= pReader->ever) { - pIter->rInfo.suid = pIter->pBlockIdx->suid; - pIter->rInfo.uid = pIter->pBlockIdx->uid; - pIter->rInfo.row = tsdbRowFromBlockData(&pIter->bData, pIter->iRow); - goto _add_iter_and_break; +/* close */ +static void tsdbCloseDataFileDataIter(STsdbDataIter2* pIter) { + tBlockDataDestroy(&pIter->dIter.bData); + tMapDataClear(&pIter->dIter.mDataBlk); + taosArrayDestroy(pIter->dIter.aBlockIdx); + taosMemoryFree(pIter); +} + +static void tsdbCloseSttFileDataIter(STsdbDataIter2* pIter) { + tBlockDataDestroy(&pIter->sIter.bData); + taosArrayDestroy(pIter->sIter.aSttBlk); + taosMemoryFree(pIter); +} + +static void tsdbCloseTombFileDataIter(STsdbDataIter2* pIter) { + taosArrayDestroy(pIter->tIter.aDelData); + taosArrayDestroy(pIter->tIter.aDelIdx); + taosMemoryFree(pIter); +} + +static void tsdbCloseDataIter2(STsdbDataIter2* pIter) { + if (pIter->type == TSDB_MEM_TABLE_DATA_ITER) { + ASSERT(0); + } else if (pIter->type == TSDB_DATA_FILE_DATA_ITER) { + tsdbCloseDataFileDataIter(pIter); + } else if (pIter->type == TSDB_STT_FILE_DATA_ITER) { + tsdbCloseSttFileDataIter(pIter); + } else if (pIter->type == TSDB_TOMB_FILE_DATA_ITER) { + tsdbCloseTombFileDataIter(pIter); + } else { + ASSERT(0); + } +} + +/* cmpr */ +static int32_t tsdbDataIterCmprFn(const SRBTreeNode* pNode1, const SRBTreeNode* pNode2) { + STsdbDataIter2* pIter1 = TSDB_RBTN_TO_DATA_ITER(pNode1); + STsdbDataIter2* pIter2 = TSDB_RBTN_TO_DATA_ITER(pNode2); + return tRowInfoCmprFn(&pIter1->rowInfo, &pIter2->rowInfo); +} + +/* seek */ + +/* iter next */ +static int32_t tsdbDataFileDataIterNext(STsdbDataIter2* pIter, STsdbFilterInfo* pFilterInfo) { + int32_t code = 0; + int32_t lino = 0; + + for (;;) { + while (pIter->dIter.iRow < pIter->dIter.bData.nRow) { + if (pFilterInfo) { + if (pFilterInfo->flag & TSDB_FILTER_FLAG_BY_VERSION) { + if (pIter->dIter.bData.aVersion[pIter->dIter.iRow] < pFilterInfo->sver || + pIter->dIter.bData.aVersion[pIter->dIter.iRow] > pFilterInfo->ever) { + pIter->dIter.iRow++; + continue; + } } } + + pIter->rowInfo.suid = pIter->dIter.bData.suid; + pIter->rowInfo.uid = pIter->dIter.bData.uid; + pIter->rowInfo.row = tsdbRowFromBlockData(&pIter->dIter.bData, pIter->dIter.iRow); + pIter->dIter.iRow++; + goto _exit; } - continue; + for (;;) { + while (pIter->dIter.iDataBlk < pIter->dIter.mDataBlk.nItem) { + SDataBlk dataBlk; + tMapDataGetItemByIdx(&pIter->dIter.mDataBlk, pIter->dIter.iDataBlk, &dataBlk, tGetDataBlk); + + // filter + if (pFilterInfo) { + if (pFilterInfo->flag & TSDB_FILTER_FLAG_BY_VERSION) { + if (pFilterInfo->sver > dataBlk.maxVer || pFilterInfo->ever < dataBlk.minVer) { + pIter->dIter.iDataBlk++; + continue; + } + } + } - _add_iter_and_break: - tRBTreePut(&pReader->rbt, (SRBTreeNode*)pIter); - break; - } + code = tsdbReadDataBlockEx(pIter->dIter.pReader, &dataBlk, &pIter->dIter.bData); + TSDB_CHECK_CODE(code, lino, _exit); - // .stt file - pIter = &pReader->aFDataIter[1]; - for (int32_t iStt = 0; iStt < pSet->nSttF; iStt++) { - pIter->type = SNAP_STT_FILE_ITER; - pIter->iStt = iStt; + pIter->dIter.iDataBlk++; + pIter->dIter.iRow = 0; - code = tsdbReadSttBlk(pReader->pDataFReader, iStt, pIter->aSttBlk); - TSDB_CHECK_CODE(code, lino, _exit); + break; + } - for (pIter->iSttBlk = 0; pIter->iSttBlk < taosArrayGetSize(pIter->aSttBlk); pIter->iSttBlk++) { - SSttBlk* pSttBlk = (SSttBlk*)taosArrayGet(pIter->aSttBlk, pIter->iSttBlk); + if (pIter->dIter.iRow < pIter->dIter.bData.nRow) break; - if (pSttBlk->minVer > pReader->ever) continue; - if (pSttBlk->maxVer < pReader->sver) continue; + for (;;) { + if (pIter->dIter.iBlockIdx < taosArrayGetSize(pIter->dIter.aBlockIdx)) { + SBlockIdx* pBlockIdx = taosArrayGet(pIter->dIter.aBlockIdx, pIter->dIter.iBlockIdx); - code = tsdbReadSttBlockEx(pReader->pDataFReader, iStt, pSttBlk, &pIter->bData); - TSDB_CHECK_CODE(code, lino, _exit); + code = tsdbReadDataBlk(pIter->dIter.pReader, pBlockIdx, &pIter->dIter.mDataBlk); + TSDB_CHECK_CODE(code, lino, _exit); - for (pIter->iRow = 0; pIter->iRow < pIter->bData.nRow; pIter->iRow++) { - int64_t rowVer = pIter->bData.aVersion[pIter->iRow]; + pIter->dIter.iBlockIdx++; + pIter->dIter.iDataBlk = 0; - if (rowVer >= pReader->sver && rowVer <= pReader->ever) { - pIter->rInfo.suid = pIter->bData.suid; - pIter->rInfo.uid = pIter->bData.uid ? pIter->bData.uid : pIter->bData.aUid[pIter->iRow]; - pIter->rInfo.row = tsdbRowFromBlockData(&pIter->bData, pIter->iRow); - goto _add_iter; + break; + } else { + pIter->rowInfo = (SRowInfo){0}; + goto _exit; } } } - - continue; - - _add_iter: - tRBTreePut(&pReader->rbt, (SRBTreeNode*)pIter); - pIter++; } _exit: if (code) { - tsdbError("vgId:%d, %s failed since %s", TD_VID(pReader->pTsdb->pVnode), __func__, tstrerror(code)); - } else { - tsdbInfo("vgId:%d, %s done, path:%s, fid:%d", TD_VID(pReader->pTsdb->pVnode), __func__, pReader->pTsdb->path, - pReader->fid); + tsdbError("%s failed at line %d since %s", __func__, lino, tstrerror(code)); } return code; } -static int32_t tsdbSnapNextRow(STsdbSnapReader* pReader) { +static int32_t tsdbSttFileDataIterNext(STsdbDataIter2* pIter, STsdbFilterInfo* pFilterInfo) { int32_t code = 0; + int32_t lino = 0; - if (pReader->pIter) { - SFDataIter* pIter = NULL; - while (true) { - _find_row: - pIter = pReader->pIter; - for (pIter->iRow++; pIter->iRow < pIter->bData.nRow; pIter->iRow++) { - int64_t rowVer = pIter->bData.aVersion[pIter->iRow]; - - if (rowVer >= pReader->sver && rowVer <= pReader->ever) { - pIter->rInfo.suid = pIter->bData.suid; - pIter->rInfo.uid = pIter->bData.uid ? pIter->bData.uid : pIter->bData.aUid[pIter->iRow]; - pIter->rInfo.row = tsdbRowFromBlockData(&pIter->bData, pIter->iRow); - goto _out; + for (;;) { + while (pIter->sIter.iRow < pIter->sIter.bData.nRow) { + if (pFilterInfo) { + if (pFilterInfo->flag & TSDB_FILTER_FLAG_BY_VERSION) { + if (pFilterInfo->sver > pIter->sIter.bData.aVersion[pIter->sIter.iRow] || + pFilterInfo->ever < pIter->sIter.bData.aVersion[pIter->sIter.iRow]) { + pIter->sIter.iRow++; + continue; + } } } - if (pIter->type == SNAP_DATA_FILE_ITER) { - while (true) { - for (pIter->iBlock++; pIter->iBlock < pIter->mBlock.nItem; pIter->iBlock++) { - SDataBlk dataBlk; - tMapDataGetItemByIdx(&pIter->mBlock, pIter->iBlock, &dataBlk, tGetDataBlk); - - if (dataBlk.minVer > pReader->ever || dataBlk.maxVer < pReader->sver) continue; + pIter->rowInfo.suid = pIter->sIter.bData.suid; + pIter->rowInfo.uid = pIter->sIter.bData.uid ? pIter->sIter.bData.uid : pIter->sIter.bData.aUid[pIter->sIter.iRow]; + pIter->rowInfo.row = tsdbRowFromBlockData(&pIter->sIter.bData, pIter->sIter.iRow); + pIter->sIter.iRow++; + goto _exit; + } - code = tsdbReadDataBlockEx(pReader->pDataFReader, &dataBlk, &pIter->bData); - if (code) goto _err; + for (;;) { + if (pIter->sIter.iSttBlk < taosArrayGetSize(pIter->sIter.aSttBlk)) { + SSttBlk* pSttBlk = taosArrayGet(pIter->sIter.aSttBlk, pIter->sIter.iSttBlk); - pIter->iRow = -1; - goto _find_row; + if (pFilterInfo) { + if (pFilterInfo->flag & TSDB_FILTER_FLAG_BY_VERSION) { + if (pFilterInfo->sver > pSttBlk->maxVer || pFilterInfo->ever < pSttBlk->minVer) { + pIter->sIter.iSttBlk++; + continue; + } } - - pIter->iBlockIdx++; - if (pIter->iBlockIdx >= taosArrayGetSize(pIter->aBlockIdx)) break; - - pIter->pBlockIdx = (SBlockIdx*)taosArrayGet(pIter->aBlockIdx, pIter->iBlockIdx); - code = tsdbReadDataBlk(pReader->pDataFReader, pIter->pBlockIdx, &pIter->mBlock); - if (code) goto _err; - pIter->iBlock = -1; } - pReader->pIter = NULL; + code = tsdbReadSttBlockEx(pIter->sIter.pReader, pIter->sIter.iStt, pSttBlk, &pIter->sIter.bData); + TSDB_CHECK_CODE(code, lino, _exit); + + pIter->sIter.iRow = 0; + pIter->sIter.iSttBlk++; break; - } else if (pIter->type == SNAP_STT_FILE_ITER) { - for (pIter->iSttBlk++; pIter->iSttBlk < taosArrayGetSize(pIter->aSttBlk); pIter->iSttBlk++) { - SSttBlk* pSttBlk = (SSttBlk*)taosArrayGet(pIter->aSttBlk, pIter->iSttBlk); + } else { + pIter->rowInfo = (SRowInfo){0}; + goto _exit; + } + } + } + +_exit: + if (code) { + tsdbError("%s failed at line %d since %s", __func__, lino, tstrerror(code)); + } + return code; +} - if (pSttBlk->minVer > pReader->ever || pSttBlk->maxVer < pReader->sver) continue; +static int32_t tsdbTombFileDataIterNext(STsdbDataIter2* pIter, STsdbFilterInfo* pFilterInfo) { + int32_t code = 0; + int32_t lino = 0; - code = tsdbReadSttBlockEx(pReader->pDataFReader, pIter->iStt, pSttBlk, &pIter->bData); - if (code) goto _err; + for (;;) { + while (pIter->tIter.iDelData < taosArrayGetSize(pIter->tIter.aDelData)) { + SDelData* pDelData = taosArrayGet(pIter->tIter.aDelData, pIter->tIter.iDelData); - pIter->iRow = -1; - goto _find_row; + if (pFilterInfo) { + if (pFilterInfo->flag & TSDB_FILTER_FLAG_BY_VERSION) { + if (pFilterInfo->sver > pDelData->version || pFilterInfo->ever < pDelData->version) { + pIter->tIter.iDelData++; + continue; + } } + } - pReader->pIter = NULL; + pIter->delInfo.delData = *pDelData; + pIter->tIter.iDelData++; + goto _exit; + } + + for (;;) { + if (pIter->tIter.iDelIdx < taosArrayGetSize(pIter->tIter.aDelIdx)) { + SDelIdx* pDelIdx = taosArrayGet(pIter->tIter.aDelIdx, pIter->tIter.iDelIdx); + + code = tsdbReadDelData(pIter->tIter.pReader, pDelIdx, pIter->tIter.aDelData); + TSDB_CHECK_CODE(code, lino, _exit); + + pIter->delInfo.suid = pDelIdx->suid; + pIter->delInfo.uid = pDelIdx->uid; + pIter->tIter.iDelData = 0; + pIter->tIter.iDelIdx++; break; } else { - ASSERT(0); + pIter->delInfo = (SDelInfo){0}; + goto _exit; } } + } - _out: - pIter = (SFDataIter*)tRBTreeMin(&pReader->rbt); - if (pReader->pIter && pIter) { - int32_t c = tRowInfoCmprFn(&pReader->pIter->rInfo, &pIter->rInfo); - if (c > 0) { - tRBTreePut(&pReader->rbt, (SRBTreeNode*)pReader->pIter); - pReader->pIter = NULL; - } else { - ASSERT(c); - } +_exit: + if (code) { + tsdbError("%s failed at line %d since %s", __func__, lino, tstrerror(code)); + } + return code; +} + +static int32_t tsdbDataIterNext2(STsdbDataIter2* pIter, STsdbFilterInfo* pFilterInfo) { + int32_t code = 0; + + if (pIter->type == TSDB_MEM_TABLE_DATA_ITER) { + ASSERT(0); + return code; + } else if (pIter->type == TSDB_DATA_FILE_DATA_ITER) { + return tsdbDataFileDataIterNext(pIter, pFilterInfo); + } else if (pIter->type == TSDB_STT_FILE_DATA_ITER) { + return tsdbSttFileDataIterNext(pIter, pFilterInfo); + } else if (pIter->type == TSDB_TOMB_FILE_DATA_ITER) { + return tsdbTombFileDataIterNext(pIter, pFilterInfo); + } else { + ASSERT(0); + return code; + } +} + +/* get */ + +// STsdbSnapReader ======================================== +struct STsdbSnapReader { + STsdb* pTsdb; + int64_t sver; + int64_t ever; + int8_t type; + uint8_t* aBuf[5]; + + STsdbFS fs; + TABLEID tbid; + SSkmInfo skmTable; + + // timeseries data + int8_t dataDone; + int32_t fid; + + SDataFReader* pDataFReader; + STsdbDataIter2* iterList; + STsdbDataIter2* pIter; + SRBTree rbt; + SBlockData bData; + + // tombstone data + int8_t delDone; + SDelFReader* pDelFReader; + STsdbDataIter2* pTIter; + SArray* aDelData; +}; + +static int32_t tsdbSnapReadFileDataStart(STsdbSnapReader* pReader) { + int32_t code = 0; + int32_t lino = 0; + + SDFileSet* pSet = taosArraySearch(pReader->fs.aDFileSet, &(SDFileSet){.fid = pReader->fid}, tDFileSetCmprFn, TD_GT); + if (pSet == NULL) { + pReader->fid = INT32_MAX; + goto _exit; + } + + pReader->fid = pSet->fid; + + tRBTreeCreate(&pReader->rbt, tsdbDataIterCmprFn); + + code = tsdbDataFReaderOpen(&pReader->pDataFReader, pReader->pTsdb, pSet); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbOpenDataFileDataIter(pReader->pDataFReader, &pReader->pIter); + TSDB_CHECK_CODE(code, lino, _exit); + + if (pReader->pIter) { + // iter to next with filter info (sver, ever) + code = tsdbDataIterNext2(pReader->pIter, + &(STsdbFilterInfo){.flag = TSDB_FILTER_FLAG_BY_VERSION, // flag + .sver = pReader->sver, + .ever = pReader->ever}); + TSDB_CHECK_CODE(code, lino, _exit); + + if (pReader->pIter->rowInfo.suid || pReader->pIter->rowInfo.uid) { + // add to rbtree + tRBTreePut(&pReader->rbt, &pReader->pIter->rbtn); + + // add to iterList + pReader->pIter->next = pReader->iterList; + pReader->iterList = pReader->pIter; + } else { + tsdbCloseDataIter2(pReader->pIter); } } - if (pReader->pIter == NULL) { - pReader->pIter = (SFDataIter*)tRBTreeMin(&pReader->rbt); + for (int32_t iStt = 0; iStt < pSet->nSttF; ++iStt) { + code = tsdbOpenSttFileDataIter(pReader->pDataFReader, iStt, &pReader->pIter); + TSDB_CHECK_CODE(code, lino, _exit); + if (pReader->pIter) { - tRBTreeDrop(&pReader->rbt, (SRBTreeNode*)pReader->pIter); + // iter to valid row + code = tsdbDataIterNext2(pReader->pIter, + &(STsdbFilterInfo){.flag = TSDB_FILTER_FLAG_BY_VERSION, // flag + .sver = pReader->sver, + .ever = pReader->ever}); + TSDB_CHECK_CODE(code, lino, _exit); + + if (pReader->pIter->rowInfo.suid || pReader->pIter->rowInfo.uid) { + // add to rbtree + tRBTreePut(&pReader->rbt, &pReader->pIter->rbtn); + + // add to iterList + pReader->pIter->next = pReader->iterList; + pReader->iterList = pReader->pIter; + } else { + tsdbCloseDataIter2(pReader->pIter); + } } } - return code; + pReader->pIter = NULL; -_err: +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pReader->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } else { + tsdbInfo("vgId:%d %s done, fid:%d", TD_VID(pReader->pTsdb->pVnode), __func__, pReader->fid); + } return code; } -static SRowInfo* tsdbSnapGetRow(STsdbSnapReader* pReader) { +static void tsdbSnapReadFileDataEnd(STsdbSnapReader* pReader) { + while (pReader->iterList) { + STsdbDataIter2* pIter = pReader->iterList; + pReader->iterList = pIter->next; + tsdbCloseDataIter2(pIter); + } + + tsdbDataFReaderClose(&pReader->pDataFReader); +} + +static int32_t tsdbSnapReadNextRow(STsdbSnapReader* pReader, SRowInfo** ppRowInfo) { + int32_t code = 0; + int32_t lino = 0; + if (pReader->pIter) { - return &pReader->pIter->rInfo; - } else { - tsdbSnapNextRow(pReader); + code = tsdbDataIterNext2(pReader->pIter, &(STsdbFilterInfo){.flag = TSDB_FILTER_FLAG_BY_VERSION, // flag + .sver = pReader->sver, + .ever = pReader->ever}); + TSDB_CHECK_CODE(code, lino, _exit); + + if (pReader->pIter->rowInfo.suid == 0 && pReader->pIter->rowInfo.uid == 0) { + pReader->pIter = NULL; + } else { + SRBTreeNode* pNode = tRBTreeMin(&pReader->rbt); + if (pNode) { + int32_t c = tsdbDataIterCmprFn(&pReader->pIter->rbtn, pNode); + if (c > 0) { + tRBTreePut(&pReader->rbt, &pReader->pIter->rbtn); + pReader->pIter = NULL; + } else if (c == 0) { + ASSERT(0); + } + } + } + } + if (pReader->pIter == NULL) { + SRBTreeNode* pNode = tRBTreeMin(&pReader->rbt); + if (pNode) { + tRBTreeDrop(&pReader->rbt, pNode); + pReader->pIter = TSDB_RBTN_TO_DATA_ITER(pNode); + } + } + + if (ppRowInfo) { if (pReader->pIter) { - return &pReader->pIter->rInfo; + *ppRowInfo = &pReader->pIter->rowInfo; } else { - return NULL; + *ppRowInfo = NULL; } } + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pReader->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } + return code; +} + +static int32_t tsdbSnapReadGetRow(STsdbSnapReader* pReader, SRowInfo** ppRowInfo) { + if (pReader->pIter) { + *ppRowInfo = &pReader->pIter->rowInfo; + return 0; + } + + return tsdbSnapReadNextRow(pReader, ppRowInfo); } static int32_t tsdbSnapCmprData(STsdbSnapReader* pReader, uint8_t** ppData) { @@ -318,155 +672,215 @@ _exit: return code; } -static int32_t tsdbSnapReadData(STsdbSnapReader* pReader, uint8_t** ppData) { +static int32_t tsdbSnapReadTimeSeriesData(STsdbSnapReader* pReader, uint8_t** ppData) { int32_t code = 0; int32_t lino = 0; STsdb* pTsdb = pReader->pTsdb; - while (true) { + tBlockDataReset(&pReader->bData); + + for (;;) { + // start a new file read if need if (pReader->pDataFReader == NULL) { - code = tsdbSnapReadOpenFile(pReader); + code = tsdbSnapReadFileDataStart(pReader); TSDB_CHECK_CODE(code, lino, _exit); } if (pReader->pDataFReader == NULL) break; - SRowInfo* pRowInfo = tsdbSnapGetRow(pReader); + SRowInfo* pRowInfo; + code = tsdbSnapReadGetRow(pReader, &pRowInfo); + TSDB_CHECK_CODE(code, lino, _exit); + if (pRowInfo == NULL) { - tsdbDataFReaderClose(&pReader->pDataFReader); + tsdbSnapReadFileDataEnd(pReader); continue; } - TABLEID id = {.suid = pRowInfo->suid, .uid = pRowInfo->uid}; - SBlockData* pBlockData = &pReader->bData; - - code = tsdbUpdateTableSchema(pTsdb->pVnode->pMeta, id.suid, id.uid, &pReader->skmTable); + code = tsdbUpdateTableSchema(pTsdb->pVnode->pMeta, pRowInfo->suid, pRowInfo->uid, &pReader->skmTable); TSDB_CHECK_CODE(code, lino, _exit); - code = tBlockDataInit(pBlockData, &id, pReader->skmTable.pTSchema, NULL, 0); + code = tBlockDataInit(&pReader->bData, (TABLEID*)pRowInfo, pReader->skmTable.pTSchema, NULL, 0); TSDB_CHECK_CODE(code, lino, _exit); - while (pRowInfo->suid == id.suid && pRowInfo->uid == id.uid) { - code = tBlockDataAppendRow(pBlockData, &pRowInfo->row, NULL, pRowInfo->uid); + do { + if (!TABLE_SAME_SCHEMA(pReader->bData.suid, pReader->bData.uid, pRowInfo->suid, pRowInfo->uid)) break; + + if (pReader->bData.uid && pReader->bData.uid != pRowInfo->uid) { + code = tRealloc((uint8_t**)&pReader->bData.aUid, sizeof(int64_t) * (pReader->bData.nRow + 1)); + TSDB_CHECK_CODE(code, lino, _exit); + + for (int32_t iRow = 0; iRow < pReader->bData.nRow; ++iRow) { + pReader->bData.aUid[iRow] = pReader->bData.uid; + } + pReader->bData.uid = 0; + } + + code = tBlockDataAppendRow(&pReader->bData, &pRowInfo->row, NULL, pRowInfo->uid); TSDB_CHECK_CODE(code, lino, _exit); - code = tsdbSnapNextRow(pReader); + code = tsdbSnapReadNextRow(pReader, &pRowInfo); TSDB_CHECK_CODE(code, lino, _exit); - pRowInfo = tsdbSnapGetRow(pReader); - if (pRowInfo == NULL) { - tsdbDataFReaderClose(&pReader->pDataFReader); - break; - } + if (pReader->bData.nRow >= 4096) break; + } while (pRowInfo); + + ASSERT(pReader->bData.nRow > 0); + + break; + } + + if (pReader->bData.nRow > 0) { + ASSERT(pReader->bData.suid || pReader->bData.uid); + + code = tsdbSnapCmprData(pReader, ppData); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); + } + return code; +} + +static int32_t tsdbSnapCmprTombData(STsdbSnapReader* pReader, uint8_t** ppData) { + int32_t code = 0; + int32_t lino = 0; + + int64_t size = sizeof(TABLEID); + for (int32_t iDelData = 0; iDelData < taosArrayGetSize(pReader->aDelData); ++iDelData) { + size += tPutDelData(NULL, taosArrayGet(pReader->aDelData, iDelData)); + } + + uint8_t* pData = (uint8_t*)taosMemoryMalloc(sizeof(SSnapDataHdr) + size); + if (pData == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + SSnapDataHdr* pHdr = (SSnapDataHdr*)pData; + pHdr->type = SNAP_DATA_DEL; + pHdr->size = size; + + TABLEID* pId = (TABLEID*)(pData + sizeof(SSnapDataHdr)); + *pId = pReader->tbid; + + size = sizeof(SSnapDataHdr) + sizeof(TABLEID); + for (int32_t iDelData = 0; iDelData < taosArrayGetSize(pReader->aDelData); ++iDelData) { + size += tPutDelData(pData + size, taosArrayGet(pReader->aDelData, iDelData)); + } - if (pBlockData->nRow >= 4096) break; +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pReader->pTsdb->pVnode), __func__, lino, tstrerror(code)); + if (pData) { + taosMemoryFree(pData); + pData = NULL; } + } + *ppData = pData; + return code; +} - code = tsdbSnapCmprData(pReader, ppData); - TSDB_CHECK_CODE(code, lino, _exit); +static void tsdbSnapReadGetTombData(STsdbSnapReader* pReader, SDelInfo** ppDelInfo) { + if (pReader->pTIter == NULL || (pReader->pTIter->delInfo.suid == 0 && pReader->pTIter->delInfo.uid == 0)) { + *ppDelInfo = NULL; + } else { + *ppDelInfo = &pReader->pTIter->delInfo; + } +} - break; +static int32_t tsdbSnapReadNextTombData(STsdbSnapReader* pReader, SDelInfo** ppDelInfo) { + int32_t code = 0; + int32_t lino = 0; + + code = tsdbDataIterNext2( + pReader->pTIter, + &(STsdbFilterInfo){.flag = TSDB_FILTER_FLAG_BY_VERSION, .sver = pReader->sver, .ever = pReader->ever}); + TSDB_CHECK_CODE(code, lino, _exit); + + if (ppDelInfo) { + tsdbSnapReadGetTombData(pReader, ppDelInfo); } _exit: if (code) { - tsdbError("vgId:%d, %s failed since %s, path:%s", TD_VID(pTsdb->pVnode), __func__, tstrerror(code), pTsdb->path); + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pReader->pTsdb->pVnode), __func__, lino, tstrerror(code)); } return code; } -static int32_t tsdbSnapReadDel(STsdbSnapReader* pReader, uint8_t** ppData) { +static int32_t tsdbSnapReadTombData(STsdbSnapReader* pReader, uint8_t** ppData) { int32_t code = 0; int32_t lino = 0; - STsdb* pTsdb = pReader->pTsdb; - SDelFile* pDelFile = pReader->fs.pDelFile; + STsdb* pTsdb = pReader->pTsdb; + // open tombstone data iter if need if (pReader->pDelFReader == NULL) { - if (pDelFile == NULL) { - goto _exit; - } + if (pReader->fs.pDelFile == NULL) goto _exit; // open - code = tsdbDelFReaderOpen(&pReader->pDelFReader, pDelFile, pTsdb); + code = tsdbDelFReaderOpen(&pReader->pDelFReader, pReader->fs.pDelFile, pTsdb); TSDB_CHECK_CODE(code, lino, _exit); - // read index - code = tsdbReadDelIdx(pReader->pDelFReader, pReader->aDelIdx); + code = tsdbOpenTombFileDataIter(pReader->pDelFReader, &pReader->pTIter); TSDB_CHECK_CODE(code, lino, _exit); - pReader->iDelIdx = 0; + if (pReader->pTIter) { + code = tsdbSnapReadNextTombData(pReader, NULL); + TSDB_CHECK_CODE(code, lino, _exit); + } } - while (true) { - if (pReader->iDelIdx >= taosArrayGetSize(pReader->aDelIdx)) { - tsdbDelFReaderClose(&pReader->pDelFReader); - break; - } + // loop to get tombstone data + SDelInfo* pDelInfo; + tsdbSnapReadGetTombData(pReader, &pDelInfo); - SDelIdx* pDelIdx = (SDelIdx*)taosArrayGet(pReader->aDelIdx, pReader->iDelIdx); + if (pDelInfo == NULL) goto _exit; - pReader->iDelIdx++; + pReader->tbid = *(TABLEID*)pDelInfo; - code = tsdbReadDelData(pReader->pDelFReader, pDelIdx, pReader->aDelData); + if (pReader->aDelData) { + taosArrayClear(pReader->aDelData); + } else if ((pReader->aDelData = taosArrayInit(16, sizeof(SDelData))) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; TSDB_CHECK_CODE(code, lino, _exit); + } - int32_t size = 0; - for (int32_t iDelData = 0; iDelData < taosArrayGetSize(pReader->aDelData); iDelData++) { - SDelData* pDelData = (SDelData*)taosArrayGet(pReader->aDelData, iDelData); - - if (pDelData->version >= pReader->sver && pDelData->version <= pReader->ever) { - size += tPutDelData(NULL, pDelData); - } - } - if (size == 0) continue; - - // org data - size = sizeof(TABLEID) + size; - *ppData = taosMemoryMalloc(sizeof(SSnapDataHdr) + size); - if (*ppData == NULL) { + while (pDelInfo && pDelInfo->suid == pReader->tbid.suid && pDelInfo->uid == pReader->tbid.uid) { + if (taosArrayPush(pReader->aDelData, &pDelInfo->delData) < 0) { code = TSDB_CODE_OUT_OF_MEMORY; TSDB_CHECK_CODE(code, lino, _exit); } - SSnapDataHdr* pHdr = (SSnapDataHdr*)(*ppData); - pHdr->type = SNAP_DATA_DEL; - pHdr->size = size; - - TABLEID* pId = (TABLEID*)(&pHdr[1]); - pId->suid = pDelIdx->suid; - pId->uid = pDelIdx->uid; - int32_t n = sizeof(SSnapDataHdr) + sizeof(TABLEID); - for (int32_t iDelData = 0; iDelData < taosArrayGetSize(pReader->aDelData); iDelData++) { - SDelData* pDelData = (SDelData*)taosArrayGet(pReader->aDelData, iDelData); - - if (pDelData->version < pReader->sver) continue; - if (pDelData->version > pReader->ever) continue; - - n += tPutDelData((*ppData) + n, pDelData); - } - - tsdbInfo("vgId:%d, vnode snapshot tsdb read del data for %s, suid:%" PRId64 " uid:%" PRId64 " size:%d", - TD_VID(pTsdb->pVnode), pTsdb->path, pDelIdx->suid, pDelIdx->uid, size); + code = tsdbSnapReadNextTombData(pReader, &pDelInfo); + TSDB_CHECK_CODE(code, lino, _exit); + } - break; + // encode tombstone data + if (taosArrayGetSize(pReader->aDelData) > 0) { + code = tsdbSnapCmprTombData(pReader, ppData); + TSDB_CHECK_CODE(code, lino, _exit); } _exit: if (code) { - tsdbError("vgId:%d, %s failed since %s, path:%s", TD_VID(pTsdb->pVnode), __func__, tstrerror(code), pTsdb->path); + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); + } else { + tsdbDebug("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__); } return code; } int32_t tsdbSnapReaderOpen(STsdb* pTsdb, int64_t sver, int64_t ever, int8_t type, STsdbSnapReader** ppReader) { - int32_t code = 0; - int32_t lino = 0; - STsdbSnapReader* pReader = NULL; + int32_t code = 0; + int32_t lino = 0; // alloc - pReader = (STsdbSnapReader*)taosMemoryCalloc(1, sizeof(*pReader)); + STsdbSnapReader* pReader = (STsdbSnapReader*)taosMemoryCalloc(1, sizeof(*pReader)); if (pReader == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; TSDB_CHECK_CODE(code, lino, _exit); @@ -476,118 +890,80 @@ int32_t tsdbSnapReaderOpen(STsdb* pTsdb, int64_t sver, int64_t ever, int8_t type pReader->ever = ever; pReader->type = type; - code = taosThreadRwlockRdlock(&pTsdb->rwLock); - if (code) { - code = TAOS_SYSTEM_ERROR(code); - TSDB_CHECK_CODE(code, lino, _exit); - } - + taosThreadRwlockRdlock(&pTsdb->rwLock); code = tsdbFSRef(pTsdb, &pReader->fs); if (code) { taosThreadRwlockUnlock(&pTsdb->rwLock); TSDB_CHECK_CODE(code, lino, _exit); } + taosThreadRwlockUnlock(&pTsdb->rwLock); - code = taosThreadRwlockUnlock(&pTsdb->rwLock); - if (code) { - code = TAOS_SYSTEM_ERROR(code); - TSDB_CHECK_CODE(code, lino, _exit); - } - - // data + // init pReader->fid = INT32_MIN; - for (int32_t iIter = 0; iIter < sizeof(pReader->aFDataIter) / sizeof(pReader->aFDataIter[0]); iIter++) { - SFDataIter* pIter = &pReader->aFDataIter[iIter]; - - if (iIter == 0) { - pIter->aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx)); - if (pIter->aBlockIdx == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - } else { - pIter->aSttBlk = taosArrayInit(0, sizeof(SSttBlk)); - if (pIter->aSttBlk == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - } - - code = tBlockDataCreate(&pIter->bData); - TSDB_CHECK_CODE(code, lino, _exit); - } code = tBlockDataCreate(&pReader->bData); TSDB_CHECK_CODE(code, lino, _exit); - // del - pReader->aDelIdx = taosArrayInit(0, sizeof(SDelIdx)); - if (pReader->aDelIdx == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - pReader->aDelData = taosArrayInit(0, sizeof(SDelData)); - if (pReader->aDelData == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - _exit: if (code) { - tsdbError("vgId:%d, %s failed at line %d since %s, TSDB path: %s", TD_VID(pTsdb->pVnode), __func__, lino, - tstrerror(code), pTsdb->path); - *ppReader = NULL; - + tsdbError("vgId:%d %s failed at line %d since %s, sver:%" PRId64 " ever:%" PRId64 " type:%d", TD_VID(pTsdb->pVnode), + __func__, lino, tstrerror(code), sver, ever, type); if (pReader) { - taosArrayDestroy(pReader->aDelData); - taosArrayDestroy(pReader->aDelIdx); tBlockDataDestroy(&pReader->bData); - tsdbFSDestroy(&pReader->fs); + tsdbFSUnref(pTsdb, &pReader->fs); taosMemoryFree(pReader); + pReader = NULL; } } else { - *ppReader = pReader; - tsdbInfo("vgId:%d, vnode snapshot tsdb reader opened for %s", TD_VID(pTsdb->pVnode), pTsdb->path); + tsdbInfo("vgId:%d %s done, sver:%" PRId64 " ever:%" PRId64 " type:%d", TD_VID(pTsdb->pVnode), __func__, sver, ever, + type); } + *ppReader = pReader; return code; } int32_t tsdbSnapReaderClose(STsdbSnapReader** ppReader) { - int32_t code = 0; - STsdbSnapReader* pReader = *ppReader; - - // data - if (pReader->pDataFReader) tsdbDataFReaderClose(&pReader->pDataFReader); - for (int32_t iIter = 0; iIter < sizeof(pReader->aFDataIter) / sizeof(pReader->aFDataIter[0]); iIter++) { - SFDataIter* pIter = &pReader->aFDataIter[iIter]; + int32_t code = 0; + int32_t lino = 0; - if (iIter == 0) { - taosArrayDestroy(pIter->aBlockIdx); - tMapDataClear(&pIter->mBlock); - } else { - taosArrayDestroy(pIter->aSttBlk); - } + STsdbSnapReader* pReader = *ppReader; + STsdb* pTsdb = pReader->pTsdb; - tBlockDataDestroy(&pIter->bData); + // tombstone + if (pReader->pTIter) { + tsdbCloseDataIter2(pReader->pTIter); + pReader->pTIter = NULL; + } + if (pReader->pDelFReader) { + tsdbDelFReaderClose(&pReader->pDelFReader); } + taosArrayDestroy(pReader->aDelData); + // timeseries + while (pReader->iterList) { + STsdbDataIter2* pIter = pReader->iterList; + pReader->iterList = pIter->next; + tsdbCloseDataIter2(pIter); + } + if (pReader->pDataFReader) { + tsdbDataFReaderClose(&pReader->pDataFReader); + } tBlockDataDestroy(&pReader->bData); - tDestroyTSchema(pReader->skmTable.pTSchema); - - // del - if (pReader->pDelFReader) tsdbDelFReaderClose(&pReader->pDelFReader); - taosArrayDestroy(pReader->aDelIdx); - taosArrayDestroy(pReader->aDelData); + // other + tDestroyTSchema(pReader->skmTable.pTSchema); tsdbFSUnref(pReader->pTsdb, &pReader->fs); - - tsdbInfo("vgId:%d, vnode snapshot tsdb reader closed for %s", TD_VID(pReader->pTsdb->pVnode), pReader->pTsdb->path); - for (int32_t iBuf = 0; iBuf < sizeof(pReader->aBuf) / sizeof(pReader->aBuf[0]); iBuf++) { tFree(pReader->aBuf[iBuf]); } - taosMemoryFree(pReader); + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); + } else { + tsdbDebug("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__); + } *ppReader = NULL; return code; } @@ -600,7 +976,7 @@ int32_t tsdbSnapRead(STsdbSnapReader* pReader, uint8_t** ppData) { // read data file if (!pReader->dataDone) { - code = tsdbSnapReadData(pReader, ppData); + code = tsdbSnapReadTimeSeriesData(pReader, ppData); TSDB_CHECK_CODE(code, lino, _exit); if (*ppData) { goto _exit; @@ -611,7 +987,7 @@ int32_t tsdbSnapRead(STsdbSnapReader* pReader, uint8_t** ppData) { // read del file if (!pReader->delDone) { - code = tsdbSnapReadDel(pReader, ppData); + code = tsdbSnapReadTombData(pReader, ppData); TSDB_CHECK_CODE(code, lino, _exit); if (*ppData) { goto _exit; @@ -622,22 +998,18 @@ int32_t tsdbSnapRead(STsdbSnapReader* pReader, uint8_t** ppData) { _exit: if (code) { - tsdbError("vgId:%d, %s failed since %s, path:%s", TD_VID(pReader->pTsdb->pVnode), __func__, tstrerror(code), - pReader->pTsdb->path); + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pReader->pTsdb->pVnode), __func__, lino, tstrerror(code)); } else { - tsdbDebug("vgId:%d, %s done, path:%s", TD_VID(pReader->pTsdb->pVnode), __func__, pReader->pTsdb->path); + tsdbDebug("vgId:%d %s done", TD_VID(pReader->pTsdb->pVnode), __func__); } return code; } // STsdbSnapWriter ======================================== struct STsdbSnapWriter { - STsdb* pTsdb; - int64_t sver; - int64_t ever; - STsdbFS fs; - - // config + STsdb* pTsdb; + int64_t sver; + int64_t ever; int32_t minutes; int8_t precision; int32_t minRow; @@ -646,641 +1018,816 @@ struct STsdbSnapWriter { int64_t commitID; uint8_t* aBuf[5]; - // for data file - SBlockData bData; - int32_t fid; - TABLEID id; - SSkmInfo skmTable; - struct { - SDataFReader* pReader; - SArray* aBlockIdx; - int32_t iBlockIdx; - SBlockIdx* pBlockIdx; - SMapData mDataBlk; - int32_t iDataBlk; - SBlockData bData; - int32_t iRow; - } dReader; - struct { - SDataFWriter* pWriter; - SArray* aBlockIdx; - SMapData mDataBlk; - SArray* aSttBlk; - SBlockData bData; - SBlockData sData; - } dWriter; - - // for del file - SDelFReader* pDelFReader; + STsdbFS fs; + TABLEID tbid; + + // time-series data + SBlockData inData; + + int32_t fid; + SSkmInfo skmTable; + + /* reader */ + SDataFReader* pDataFReader; + STsdbDataIter2* iterList; + STsdbDataIter2* pDIter; + STsdbDataIter2* pSIter; + SRBTree rbt; // SRBTree + + /* writer */ + SDataFWriter* pDataFWriter; + SArray* aBlockIdx; + SMapData mDataBlk; // SMapData + SArray* aSttBlk; // SArray + SBlockData bData; + SBlockData sData; + + // tombstone data + /* reader */ + SDelFReader* pDelFReader; + STsdbDataIter2* pTIter; + + /* writer */ SDelFWriter* pDelFWriter; - int32_t iDelIdx; - SArray* aDelIdxR; + SArray* aDelIdx; SArray* aDelData; - SArray* aDelIdxW; }; // SNAP_DATA_TSDB -extern int32_t tsdbWriteDataBlock(SDataFWriter* pWriter, SBlockData* pBlockData, SMapData* mDataBlk, int8_t cmprAlg); -extern int32_t tsdbWriteSttBlock(SDataFWriter* pWriter, SBlockData* pBlockData, SArray* aSttBlk, int8_t cmprAlg); - -static int32_t tsdbSnapNextTableData(STsdbSnapWriter* pWriter) { +static int32_t tsdbSnapWriteTableDataStart(STsdbSnapWriter* pWriter, TABLEID* pId) { int32_t code = 0; + int32_t lino = 0; + + if (pId) { + pWriter->tbid = *pId; + } else { + pWriter->tbid = (TABLEID){INT64_MAX, INT64_MAX}; + } + + if (pWriter->pDIter) { + STsdbDataIter2* pIter = pWriter->pDIter; + + // assert last table data end + ASSERT(pIter->dIter.iRow >= pIter->dIter.bData.nRow); + ASSERT(pIter->dIter.iDataBlk >= pIter->dIter.mDataBlk.nItem); + + for (;;) { + if (pIter->dIter.iBlockIdx >= taosArrayGetSize(pIter->dIter.aBlockIdx)) { + pWriter->pDIter = NULL; + break; + } + + SBlockIdx* pBlockIdx = (SBlockIdx*)taosArrayGet(pIter->dIter.aBlockIdx, pIter->dIter.iBlockIdx); + + int32_t c = tTABLEIDCmprFn(pBlockIdx, &pWriter->tbid); + if (c < 0) { + code = tsdbReadDataBlk(pIter->dIter.pReader, pBlockIdx, &pIter->dIter.mDataBlk); + TSDB_CHECK_CODE(code, lino, _exit); + + SBlockIdx* pNewBlockIdx = taosArrayReserve(pWriter->aBlockIdx, 1); + if (pNewBlockIdx == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + pNewBlockIdx->suid = pBlockIdx->suid; + pNewBlockIdx->uid = pBlockIdx->uid; + + code = tsdbWriteDataBlk(pWriter->pDataFWriter, &pIter->dIter.mDataBlk, pNewBlockIdx); + TSDB_CHECK_CODE(code, lino, _exit); + + pIter->dIter.iBlockIdx++; + } else if (c == 0) { + code = tsdbReadDataBlk(pIter->dIter.pReader, pBlockIdx, &pIter->dIter.mDataBlk); + TSDB_CHECK_CODE(code, lino, _exit); + + pIter->dIter.iDataBlk = 0; + pIter->dIter.iBlockIdx++; + + break; + } else { + pIter->dIter.iDataBlk = pIter->dIter.mDataBlk.nItem; + break; + } + } + } + + if (pId) { + code = tsdbUpdateTableSchema(pWriter->pTsdb->pVnode->pMeta, pId->suid, pId->uid, &pWriter->skmTable); + TSDB_CHECK_CODE(code, lino, _exit); + + tMapDataReset(&pWriter->mDataBlk); - ASSERT(pWriter->dReader.iRow >= pWriter->dReader.bData.nRow); + code = tBlockDataInit(&pWriter->bData, pId, pWriter->skmTable.pTSchema, NULL, 0); + TSDB_CHECK_CODE(code, lino, _exit); + } - if (pWriter->dReader.iBlockIdx < taosArrayGetSize(pWriter->dReader.aBlockIdx)) { - pWriter->dReader.pBlockIdx = (SBlockIdx*)taosArrayGet(pWriter->dReader.aBlockIdx, pWriter->dReader.iBlockIdx); + if (!TABLE_SAME_SCHEMA(pWriter->tbid.suid, pWriter->tbid.uid, pWriter->sData.suid, pWriter->sData.uid)) { + if ((pWriter->sData.nRow > 0)) { + code = tsdbWriteSttBlock(pWriter->pDataFWriter, &pWriter->sData, pWriter->aSttBlk, pWriter->cmprAlg); + TSDB_CHECK_CODE(code, lino, _exit); + } - code = tsdbReadDataBlk(pWriter->dReader.pReader, pWriter->dReader.pBlockIdx, &pWriter->dReader.mDataBlk); - if (code) goto _exit; + if (pId) { + TABLEID id = {.suid = pWriter->tbid.suid, .uid = pWriter->tbid.suid ? 0 : pWriter->tbid.uid}; + code = tBlockDataInit(&pWriter->sData, &id, pWriter->skmTable.pTSchema, NULL, 0); + TSDB_CHECK_CODE(code, lino, _exit); + } + } - pWriter->dReader.iBlockIdx++; +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); } else { - pWriter->dReader.pBlockIdx = NULL; - tMapDataReset(&pWriter->dReader.mDataBlk); + tsdbTrace("vgId:%d %s done, suid:%" PRId64 " uid:%" PRId64, TD_VID(pWriter->pTsdb->pVnode), __func__, + pWriter->tbid.suid, pWriter->tbid.uid); + } + return code; +} + +static int32_t tsdbSnapWriteTableRowImpl(STsdbSnapWriter* pWriter, TSDBROW* pRow) { + int32_t code = 0; + int32_t lino = 0; + + code = tBlockDataAppendRow(&pWriter->bData, pRow, pWriter->skmTable.pTSchema, pWriter->tbid.uid); + TSDB_CHECK_CODE(code, lino, _exit); + + if (pWriter->bData.nRow >= pWriter->maxRow) { + code = tsdbWriteDataBlock(pWriter->pDataFWriter, &pWriter->bData, &pWriter->mDataBlk, pWriter->cmprAlg); + TSDB_CHECK_CODE(code, lino, _exit); } - pWriter->dReader.iDataBlk = 0; // point to the next one - tBlockDataReset(&pWriter->dReader.bData); - pWriter->dReader.iRow = 0; _exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } return code; } -static int32_t tsdbSnapWriteCopyData(STsdbSnapWriter* pWriter, TABLEID* pId) { +static int32_t tsdbSnapWriteTableRow(STsdbSnapWriter* pWriter, TSDBROW* pRow) { int32_t code = 0; + int32_t lino = 0; - while (true) { - if (pWriter->dReader.pBlockIdx == NULL) break; - if (tTABLEIDCmprFn(pWriter->dReader.pBlockIdx, pId) >= 0) break; + TSDBKEY inKey = pRow ? TSDBROW_KEY(pRow) : TSDBKEY_MAX; + + if (pWriter->pDIter == NULL || (pWriter->pDIter->dIter.iRow >= pWriter->pDIter->dIter.bData.nRow && + pWriter->pDIter->dIter.iDataBlk >= pWriter->pDIter->dIter.mDataBlk.nItem)) { + goto _write_row; + } else { + for (;;) { + while (pWriter->pDIter->dIter.iRow < pWriter->pDIter->dIter.bData.nRow) { + TSDBROW row = tsdbRowFromBlockData(&pWriter->pDIter->dIter.bData, pWriter->pDIter->dIter.iRow); + + int32_t c = tsdbKeyCmprFn(&inKey, &TSDBROW_KEY(&row)); + if (c < 0) { + goto _write_row; + } else if (c > 0) { + code = tsdbSnapWriteTableRowImpl(pWriter, &row); + TSDB_CHECK_CODE(code, lino, _exit); + + pWriter->pDIter->dIter.iRow++; + } else { + ASSERT(0); + } + } - SBlockIdx blkIdx = *pWriter->dReader.pBlockIdx; - code = tsdbWriteDataBlk(pWriter->dWriter.pWriter, &pWriter->dReader.mDataBlk, &blkIdx); - if (code) goto _exit; + for (;;) { + if (pWriter->pDIter->dIter.iDataBlk >= pWriter->pDIter->dIter.mDataBlk.nItem) goto _write_row; - if (taosArrayPush(pWriter->dWriter.aBlockIdx, &blkIdx) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _exit; + // FIXME: Here can be slow, use array instead + SDataBlk dataBlk; + tMapDataGetItemByIdx(&pWriter->pDIter->dIter.mDataBlk, pWriter->pDIter->dIter.iDataBlk, &dataBlk, tGetDataBlk); + + int32_t c = tDataBlkCmprFn(&dataBlk, &(SDataBlk){.minKey = inKey, .maxKey = inKey}); + if (c > 0) { + goto _write_row; + } else if (c < 0) { + if (pWriter->bData.nRow > 0) { + code = tsdbWriteDataBlock(pWriter->pDataFWriter, &pWriter->bData, &pWriter->mDataBlk, pWriter->cmprAlg); + TSDB_CHECK_CODE(code, lino, _exit); + } + + tMapDataPutItem(&pWriter->pDIter->dIter.mDataBlk, &dataBlk, tPutDataBlk); + pWriter->pDIter->dIter.iDataBlk++; + } else { + code = tsdbReadDataBlockEx(pWriter->pDataFReader, &dataBlk, &pWriter->pDIter->dIter.bData); + TSDB_CHECK_CODE(code, lino, _exit); + + pWriter->pDIter->dIter.iRow = 0; + pWriter->pDIter->dIter.iDataBlk++; + break; + } + } } + } - code = tsdbSnapNextTableData(pWriter); - if (code) goto _exit; +_write_row: + if (pRow) { + code = tsdbSnapWriteTableRowImpl(pWriter, pRow); + TSDB_CHECK_CODE(code, lino, _exit); } _exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } return code; } -static int32_t tsdbSnapWriteTableDataStart(STsdbSnapWriter* pWriter, TABLEID* pId) { +static int32_t tsdbSnapWriteTableDataEnd(STsdbSnapWriter* pWriter) { int32_t code = 0; + int32_t lino = 0; + + // write a NULL row to end current table data write + code = tsdbSnapWriteTableRow(pWriter, NULL); + TSDB_CHECK_CODE(code, lino, _exit); + + if (pWriter->bData.nRow > 0) { + if (pWriter->bData.nRow < pWriter->minRow) { + ASSERT(TABLE_SAME_SCHEMA(pWriter->sData.suid, pWriter->sData.uid, pWriter->tbid.suid, pWriter->tbid.uid)); + for (int32_t iRow = 0; iRow < pWriter->bData.nRow; iRow++) { + code = + tBlockDataAppendRow(&pWriter->sData, &tsdbRowFromBlockData(&pWriter->bData, iRow), NULL, pWriter->tbid.uid); + TSDB_CHECK_CODE(code, lino, _exit); - code = tsdbSnapWriteCopyData(pWriter, pId); - if (code) goto _err; + if (pWriter->sData.nRow >= pWriter->maxRow) { + code = tsdbWriteSttBlock(pWriter->pDataFWriter, &pWriter->sData, pWriter->aSttBlk, pWriter->cmprAlg); + TSDB_CHECK_CODE(code, lino, _exit); + } + } - pWriter->id.suid = pId->suid; - pWriter->id.uid = pId->uid; + tBlockDataClear(&pWriter->bData); + } else { + code = tsdbWriteDataBlock(pWriter->pDataFWriter, &pWriter->bData, &pWriter->mDataBlk, pWriter->cmprAlg); + TSDB_CHECK_CODE(code, lino, _exit); + } + } - code = tsdbUpdateTableSchema(pWriter->pTsdb->pVnode->pMeta, pId->suid, pId->uid, &pWriter->skmTable); - if (code) goto _err; + if (pWriter->mDataBlk.nItem) { + SBlockIdx* pBlockIdx = taosArrayReserve(pWriter->aBlockIdx, 1); + if (pBlockIdx == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } - tMapDataReset(&pWriter->dWriter.mDataBlk); - code = tBlockDataInit(&pWriter->dWriter.bData, pId, pWriter->skmTable.pTSchema, NULL, 0); - if (code) goto _err; + pBlockIdx->suid = pWriter->tbid.suid; + pBlockIdx->uid = pWriter->tbid.uid; - return code; + code = tsdbWriteDataBlk(pWriter->pDataFWriter, &pWriter->mDataBlk, pBlockIdx); + TSDB_CHECK_CODE(code, lino, _exit); + } -_err: - tsdbError("vgId:%d, %s failed since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, tstrerror(code)); +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } return code; } -static int32_t tsdbSnapWriteTableDataEnd(STsdbSnapWriter* pWriter) { +static int32_t tsdbSnapWriteFileDataStart(STsdbSnapWriter* pWriter, int32_t fid) { int32_t code = 0; + int32_t lino = 0; - if (pWriter->id.suid == 0 && pWriter->id.uid == 0) return code; + ASSERT(pWriter->pDataFWriter == NULL && pWriter->fid < fid); - int32_t c = 1; - if (pWriter->dReader.pBlockIdx) { - c = tTABLEIDCmprFn(pWriter->dReader.pBlockIdx, &pWriter->id); - ASSERT(c >= 0); - } + STsdb* pTsdb = pWriter->pTsdb; + + pWriter->fid = fid; + pWriter->tbid = (TABLEID){0}; + SDFileSet* pSet = taosArraySearch(pWriter->fs.aDFileSet, &(SDFileSet){.fid = fid}, tDFileSetCmprFn, TD_EQ); + + // open reader + pWriter->pDataFReader = NULL; + pWriter->iterList = NULL; + pWriter->pDIter = NULL; + pWriter->pSIter = NULL; + tRBTreeCreate(&pWriter->rbt, tsdbDataIterCmprFn); + if (pSet) { + code = tsdbDataFReaderOpen(&pWriter->pDataFReader, pTsdb, pSet); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbOpenDataFileDataIter(pWriter->pDataFReader, &pWriter->pDIter); + TSDB_CHECK_CODE(code, lino, _exit); + if (pWriter->pDIter) { + pWriter->pDIter->next = pWriter->iterList; + pWriter->iterList = pWriter->pDIter; + } - if (c == 0) { - SBlockData* pBData = &pWriter->dWriter.bData; + for (int32_t iStt = 0; iStt < pSet->nSttF; iStt++) { + code = tsdbOpenSttFileDataIter(pWriter->pDataFReader, iStt, &pWriter->pSIter); + TSDB_CHECK_CODE(code, lino, _exit); - for (; pWriter->dReader.iRow < pWriter->dReader.bData.nRow; pWriter->dReader.iRow++) { - TSDBROW row = tsdbRowFromBlockData(&pWriter->dReader.bData, pWriter->dReader.iRow); + if (pWriter->pSIter) { + code = tsdbSttFileDataIterNext(pWriter->pSIter, NULL); + TSDB_CHECK_CODE(code, lino, _exit); - code = tBlockDataAppendRow(pBData, &row, NULL, pWriter->id.uid); - if (code) goto _err; + // add to tree + tRBTreePut(&pWriter->rbt, &pWriter->pSIter->rbtn); - if (pBData->nRow >= pWriter->maxRow) { - code = tsdbWriteDataBlock(pWriter->dWriter.pWriter, pBData, &pWriter->dWriter.mDataBlk, pWriter->cmprAlg); - if (code) goto _err; + // add to list + pWriter->pSIter->next = pWriter->iterList; + pWriter->iterList = pWriter->pSIter; } } - code = tsdbWriteDataBlock(pWriter->dWriter.pWriter, pBData, &pWriter->dWriter.mDataBlk, pWriter->cmprAlg); - if (code) goto _err; + pWriter->pSIter = NULL; + } + + // open writer + SDiskID diskId; + if (pSet) { + diskId = pSet->diskId; + } else { + tfsAllocDisk(pTsdb->pVnode->pTfs, 0 /*TODO*/, &diskId); + tfsMkdirRecurAt(pTsdb->pVnode->pTfs, pTsdb->path, diskId); + } + SDFileSet wSet = {.diskId = diskId, + .fid = fid, + .pHeadF = &(SHeadFile){.commitID = pWriter->commitID}, + .pDataF = (pSet) ? pSet->pDataF : &(SDataFile){.commitID = pWriter->commitID}, + .pSmaF = (pSet) ? pSet->pSmaF : &(SSmaFile){.commitID = pWriter->commitID}, + .nSttF = 1, + .aSttF = {&(SSttFile){.commitID = pWriter->commitID}}}; + code = tsdbDataFWriterOpen(&pWriter->pDataFWriter, pTsdb, &wSet); + TSDB_CHECK_CODE(code, lino, _exit); - for (; pWriter->dReader.iDataBlk < pWriter->dReader.mDataBlk.nItem; pWriter->dReader.iDataBlk++) { - SDataBlk dataBlk; - tMapDataGetItemByIdx(&pWriter->dReader.mDataBlk, pWriter->dReader.iDataBlk, &dataBlk, tGetDataBlk); + if (pWriter->aBlockIdx) { + taosArrayClear(pWriter->aBlockIdx); + } else if ((pWriter->aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx))) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } - code = tMapDataPutItem(&pWriter->dWriter.mDataBlk, &dataBlk, tPutDataBlk); - if (code) goto _err; - } + tMapDataReset(&pWriter->mDataBlk); - code = tsdbSnapNextTableData(pWriter); - if (code) goto _err; + if (pWriter->aSttBlk) { + taosArrayClear(pWriter->aSttBlk); + } else if ((pWriter->aSttBlk = taosArrayInit(0, sizeof(SSttBlk))) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); } - if (pWriter->dWriter.mDataBlk.nItem) { - SBlockIdx blockIdx = {.suid = pWriter->id.suid, .uid = pWriter->id.uid}; - code = tsdbWriteDataBlk(pWriter->dWriter.pWriter, &pWriter->dWriter.mDataBlk, &blockIdx); + tBlockDataReset(&pWriter->bData); + tBlockDataReset(&pWriter->sData); - if (taosArrayPush(pWriter->dWriter.aBlockIdx, &blockIdx) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s, fid:%d", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code), + fid); + } else { + tsdbDebug("vgId:%d %s done, fid:%d", TD_VID(pTsdb->pVnode), __func__, fid); + } + return code; +} + +static int32_t tsdbSnapWriteTableData(STsdbSnapWriter* pWriter, SRowInfo* pRowInfo) { + int32_t code = 0; + int32_t lino = 0; + + // switch to new table if need + if (pRowInfo == NULL || pRowInfo->uid != pWriter->tbid.uid) { + if (pWriter->tbid.uid) { + code = tsdbSnapWriteTableDataEnd(pWriter); + TSDB_CHECK_CODE(code, lino, _exit); } + + code = tsdbSnapWriteTableDataStart(pWriter, (TABLEID*)pRowInfo); + TSDB_CHECK_CODE(code, lino, _exit); } - pWriter->id.suid = 0; - pWriter->id.uid = 0; + if (pRowInfo == NULL) goto _exit; - return code; + code = tsdbSnapWriteTableRow(pWriter, &pRowInfo->row); + TSDB_CHECK_CODE(code, lino, _exit); -_err: +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } return code; } -static int32_t tsdbSnapWriteOpenFile(STsdbSnapWriter* pWriter, int32_t fid) { +static int32_t tsdbSnapWriteNextRow(STsdbSnapWriter* pWriter, SRowInfo** ppRowInfo) { int32_t code = 0; - STsdb* pTsdb = pWriter->pTsdb; - - ASSERT(pWriter->dWriter.pWriter == NULL); + int32_t lino = 0; - pWriter->fid = fid; - pWriter->id = (TABLEID){0}; - SDFileSet* pSet = taosArraySearch(pWriter->fs.aDFileSet, &(SDFileSet){.fid = fid}, tDFileSetCmprFn, TD_EQ); + if (pWriter->pSIter) { + code = tsdbDataIterNext2(pWriter->pSIter, NULL); + TSDB_CHECK_CODE(code, lino, _exit); - // Reader - if (pSet) { - code = tsdbDataFReaderOpen(&pWriter->dReader.pReader, pWriter->pTsdb, pSet); - if (code) goto _err; + if (pWriter->pSIter->rowInfo.suid == 0 && pWriter->pSIter->rowInfo.uid == 0) { + pWriter->pSIter = NULL; + } else { + SRBTreeNode* pNode = tRBTreeMin(&pWriter->rbt); + if (pNode) { + int32_t c = tsdbDataIterCmprFn(&pWriter->pSIter->rbtn, pNode); + if (c > 0) { + tRBTreePut(&pWriter->rbt, &pWriter->pSIter->rbtn); + pWriter->pSIter = NULL; + } else if (c == 0) { + ASSERT(0); + } + } + } + } - code = tsdbReadBlockIdx(pWriter->dReader.pReader, pWriter->dReader.aBlockIdx); - if (code) goto _err; - } else { - ASSERT(pWriter->dReader.pReader == NULL); - taosArrayClear(pWriter->dReader.aBlockIdx); - } - pWriter->dReader.iBlockIdx = 0; // point to the next one - code = tsdbSnapNextTableData(pWriter); - if (code) goto _err; - - // Writer - SHeadFile fHead = {.commitID = pWriter->commitID}; - SDataFile fData = {.commitID = pWriter->commitID}; - SSmaFile fSma = {.commitID = pWriter->commitID}; - SSttFile fStt = {.commitID = pWriter->commitID}; - SDFileSet wSet = {.fid = pWriter->fid, .pHeadF = &fHead, .pDataF = &fData, .pSmaF = &fSma}; - if (pSet) { - wSet.diskId = pSet->diskId; - fData = *pSet->pDataF; - fSma = *pSet->pSmaF; - for (int32_t iStt = 0; iStt < pSet->nSttF; iStt++) { - wSet.aSttF[iStt] = pSet->aSttF[iStt]; + if (pWriter->pSIter == NULL) { + SRBTreeNode* pNode = tRBTreeMin(&pWriter->rbt); + if (pNode) { + tRBTreeDrop(&pWriter->rbt, pNode); + pWriter->pSIter = TSDB_RBTN_TO_DATA_ITER(pNode); } - wSet.nSttF = pSet->nSttF + 1; // TODO: fix pSet->nSttF == pTsdb->maxFile - } else { - SDiskID did = {0}; - tfsAllocDisk(pTsdb->pVnode->pTfs, 0, &did); - tfsMkdirRecurAt(pTsdb->pVnode->pTfs, pTsdb->path, did); - wSet.diskId = did; - wSet.nSttF = 1; - } - wSet.aSttF[wSet.nSttF - 1] = &fStt; - - code = tsdbDataFWriterOpen(&pWriter->dWriter.pWriter, pWriter->pTsdb, &wSet); - if (code) goto _err; - taosArrayClear(pWriter->dWriter.aBlockIdx); - tMapDataReset(&pWriter->dWriter.mDataBlk); - taosArrayClear(pWriter->dWriter.aSttBlk); - tBlockDataReset(&pWriter->dWriter.bData); - tBlockDataReset(&pWriter->dWriter.sData); + } - return code; + if (ppRowInfo) { + if (pWriter->pSIter) { + *ppRowInfo = &pWriter->pSIter->rowInfo; + } else { + *ppRowInfo = NULL; + } + } -_err: +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } return code; } -static int32_t tsdbSnapWriteCloseFile(STsdbSnapWriter* pWriter) { +static int32_t tsdbSnapWriteGetRow(STsdbSnapWriter* pWriter, SRowInfo** ppRowInfo) { int32_t code = 0; + int32_t lino = 0; - ASSERT(pWriter->dWriter.pWriter); - - code = tsdbSnapWriteTableDataEnd(pWriter); - if (code) goto _err; - - // copy remain table data - TABLEID id = {.suid = INT64_MAX, .uid = INT64_MAX}; - code = tsdbSnapWriteCopyData(pWriter, &id); - if (code) goto _err; - - code = - tsdbWriteSttBlock(pWriter->dWriter.pWriter, &pWriter->dWriter.sData, pWriter->dWriter.aSttBlk, pWriter->cmprAlg); - if (code) goto _err; - - // Indices - code = tsdbWriteBlockIdx(pWriter->dWriter.pWriter, pWriter->dWriter.aBlockIdx); - if (code) goto _err; - - code = tsdbWriteSttBlk(pWriter->dWriter.pWriter, pWriter->dWriter.aSttBlk); - if (code) goto _err; - - code = tsdbUpdateDFileSetHeader(pWriter->dWriter.pWriter); - if (code) goto _err; - - code = tsdbFSUpsertFSet(&pWriter->fs, &pWriter->dWriter.pWriter->wSet); - if (code) goto _err; - - code = tsdbDataFWriterClose(&pWriter->dWriter.pWriter, 1); - if (code) goto _err; - - if (pWriter->dReader.pReader) { - code = tsdbDataFReaderClose(&pWriter->dReader.pReader); - if (code) goto _err; + if (pWriter->pSIter) { + *ppRowInfo = &pWriter->pSIter->rowInfo; + goto _exit; } -_exit: - return code; + code = tsdbSnapWriteNextRow(pWriter, ppRowInfo); + TSDB_CHECK_CODE(code, lino, _exit); -_err: +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } return code; } -static int32_t tsdbSnapWriteToDataFile(STsdbSnapWriter* pWriter, int32_t iRow, int8_t* done) { +static int32_t tsdbSnapWriteFileDataEnd(STsdbSnapWriter* pWriter) { int32_t code = 0; + int32_t lino = 0; - SBlockData* pBData = &pWriter->bData; - TABLEID id = {.suid = pBData->suid, .uid = pBData->uid ? pBData->uid : pBData->aUid[iRow]}; - TSDBROW row = tsdbRowFromBlockData(pBData, iRow); - TSDBKEY key = TSDBROW_KEY(&row); + ASSERT(pWriter->pDataFWriter); - *done = 0; - while (pWriter->dReader.iRow < pWriter->dReader.bData.nRow || - pWriter->dReader.iDataBlk < pWriter->dReader.mDataBlk.nItem) { - // Merge row by row - for (; pWriter->dReader.iRow < pWriter->dReader.bData.nRow; pWriter->dReader.iRow++) { - TSDBROW trow = tsdbRowFromBlockData(&pWriter->dReader.bData, pWriter->dReader.iRow); - TSDBKEY tKey = TSDBROW_KEY(&trow); + // consume remain data and end with a NULL table row + SRowInfo* pRowInfo; + code = tsdbSnapWriteGetRow(pWriter, &pRowInfo); + TSDB_CHECK_CODE(code, lino, _exit); + for (;;) { + code = tsdbSnapWriteTableData(pWriter, pRowInfo); + TSDB_CHECK_CODE(code, lino, _exit); - ASSERT(pWriter->dReader.bData.suid == id.suid && pWriter->dReader.bData.uid == id.uid); + if (pRowInfo == NULL) break; - int32_t c = tsdbKeyCmprFn(&key, &tKey); - if (c < 0) { - code = tBlockDataAppendRow(&pWriter->dWriter.bData, &row, NULL, id.uid); - if (code) goto _err; - } else if (c > 0) { - code = tBlockDataAppendRow(&pWriter->dWriter.bData, &trow, NULL, id.uid); - if (code) goto _err; - } else { - ASSERT(0); - } + code = tsdbSnapWriteNextRow(pWriter, &pRowInfo); + TSDB_CHECK_CODE(code, lino, _exit); + } - if (pWriter->dWriter.bData.nRow >= pWriter->maxRow) { - code = tsdbWriteDataBlock(pWriter->dWriter.pWriter, &pWriter->dWriter.bData, &pWriter->dWriter.mDataBlk, - pWriter->cmprAlg); - if (code) goto _err; - } + // do file-level updates + code = tsdbWriteSttBlk(pWriter->pDataFWriter, pWriter->aSttBlk); + TSDB_CHECK_CODE(code, lino, _exit); - if (c < 0) { - *done = 1; - goto _exit; - } - } + code = tsdbWriteBlockIdx(pWriter->pDataFWriter, pWriter->aBlockIdx); + TSDB_CHECK_CODE(code, lino, _exit); - // Merge row by block - SDataBlk tDataBlk = {.minKey = key, .maxKey = key}; - for (; pWriter->dReader.iDataBlk < pWriter->dReader.mDataBlk.nItem; pWriter->dReader.iDataBlk++) { - SDataBlk dataBlk; - tMapDataGetItemByIdx(&pWriter->dReader.mDataBlk, pWriter->dReader.iDataBlk, &dataBlk, tGetDataBlk); + code = tsdbUpdateDFileSetHeader(pWriter->pDataFWriter); + TSDB_CHECK_CODE(code, lino, _exit); - int32_t c = tDataBlkCmprFn(&dataBlk, &tDataBlk); - if (c < 0) { - code = tsdbWriteDataBlock(pWriter->dWriter.pWriter, &pWriter->dWriter.bData, &pWriter->dWriter.mDataBlk, - pWriter->cmprAlg); - if (code) goto _err; - - code = tMapDataPutItem(&pWriter->dWriter.mDataBlk, &dataBlk, tPutDataBlk); - if (code) goto _err; - } else if (c > 0) { - code = tBlockDataAppendRow(&pWriter->dWriter.bData, &row, NULL, id.uid); - if (code) goto _err; - - if (pWriter->dWriter.bData.nRow >= pWriter->maxRow) { - code = tsdbWriteDataBlock(pWriter->dWriter.pWriter, &pWriter->dWriter.bData, &pWriter->dWriter.mDataBlk, - pWriter->cmprAlg); - if (code) goto _err; - } + code = tsdbFSUpsertFSet(&pWriter->fs, &pWriter->pDataFWriter->wSet); + TSDB_CHECK_CODE(code, lino, _exit); - *done = 1; - goto _exit; - } else { - code = tsdbReadDataBlockEx(pWriter->dReader.pReader, &dataBlk, &pWriter->dReader.bData); - if (code) goto _err; - pWriter->dReader.iRow = 0; + code = tsdbDataFWriterClose(&pWriter->pDataFWriter, 1); + TSDB_CHECK_CODE(code, lino, _exit); - pWriter->dReader.iDataBlk++; - break; - } - } + if (pWriter->pDataFReader) { + code = tsdbDataFReaderClose(&pWriter->pDataFReader); + TSDB_CHECK_CODE(code, lino, _exit); } -_exit: - return code; + // clear sources + while (pWriter->iterList) { + STsdbDataIter2* pIter = pWriter->iterList; + pWriter->iterList = pIter->next; + tsdbCloseDataIter2(pIter); + } -_err: - tsdbError("vgId:%d, %s failed since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, tstrerror(code)); +_exit: + if (code) { + tsdbError("vgId:%d %s failed since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, tstrerror(code)); + } else { + tsdbDebug("vgId:%d %s is done", TD_VID(pWriter->pTsdb->pVnode), __func__); + } return code; } -static int32_t tsdbSnapWriteToSttFile(STsdbSnapWriter* pWriter, int32_t iRow) { +static int32_t tsdbSnapWriteTimeSeriesData(STsdbSnapWriter* pWriter, SSnapDataHdr* pHdr) { int32_t code = 0; + int32_t lino = 0; - TABLEID id = {.suid = pWriter->bData.suid, - .uid = pWriter->bData.uid ? pWriter->bData.uid : pWriter->bData.aUid[iRow]}; - TSDBROW row = tsdbRowFromBlockData(&pWriter->bData, iRow); - SBlockData* pBData = &pWriter->dWriter.sData; + code = tDecmprBlockData(pHdr->data, pHdr->size, &pWriter->inData, pWriter->aBuf); + TSDB_CHECK_CODE(code, lino, _exit); - if (pBData->suid || pBData->uid) { - if (!TABLE_SAME_SCHEMA(pBData->suid, pBData->uid, id.suid, id.uid)) { - code = tsdbWriteSttBlock(pWriter->dWriter.pWriter, pBData, pWriter->dWriter.aSttBlk, pWriter->cmprAlg); - if (code) goto _err; + ASSERT(pWriter->inData.nRow > 0); - pBData->suid = 0; - pBData->uid = 0; + // switch to new data file if need + int32_t fid = tsdbKeyFid(pWriter->inData.aTSKEY[0], pWriter->minutes, pWriter->precision); + if (pWriter->fid != fid) { + if (pWriter->pDataFWriter) { + code = tsdbSnapWriteFileDataEnd(pWriter); + TSDB_CHECK_CODE(code, lino, _exit); } - } - if (pBData->suid == 0 && pBData->uid == 0) { - code = tsdbUpdateTableSchema(pWriter->pTsdb->pVnode->pMeta, pWriter->id.suid, pWriter->id.uid, &pWriter->skmTable); - if (code) goto _err; - - TABLEID tid = {.suid = pWriter->id.suid, .uid = pWriter->id.suid ? 0 : pWriter->id.uid}; - code = tBlockDataInit(pBData, &tid, pWriter->skmTable.pTSchema, NULL, 0); - if (code) goto _err; + code = tsdbSnapWriteFileDataStart(pWriter, fid); + TSDB_CHECK_CODE(code, lino, _exit); } - code = tBlockDataAppendRow(pBData, &row, NULL, id.uid); - if (code) goto _err; + // loop write each row + SRowInfo* pRowInfo; + code = tsdbSnapWriteGetRow(pWriter, &pRowInfo); + TSDB_CHECK_CODE(code, lino, _exit); + for (int32_t iRow = 0; iRow < pWriter->inData.nRow; ++iRow) { + SRowInfo rInfo = {.suid = pWriter->inData.suid, + .uid = pWriter->inData.uid ? pWriter->inData.uid : pWriter->inData.aUid[iRow], + .row = tsdbRowFromBlockData(&pWriter->inData, iRow)}; - if (pBData->nRow >= pWriter->maxRow) { - code = tsdbWriteSttBlock(pWriter->dWriter.pWriter, pBData, pWriter->dWriter.aSttBlk, pWriter->cmprAlg); - if (code) goto _err; + for (;;) { + if (pRowInfo == NULL) { + code = tsdbSnapWriteTableData(pWriter, &rInfo); + TSDB_CHECK_CODE(code, lino, _exit); + break; + } else { + int32_t c = tRowInfoCmprFn(&rInfo, pRowInfo); + if (c < 0) { + code = tsdbSnapWriteTableData(pWriter, &rInfo); + TSDB_CHECK_CODE(code, lino, _exit); + break; + } else if (c > 0) { + code = tsdbSnapWriteTableData(pWriter, pRowInfo); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbSnapWriteNextRow(pWriter, &pRowInfo); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + ASSERT(0); + } + } + } } _exit: - return code; - -_err: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } else { + tsdbDebug("vgId:%d %s done, suid:%" PRId64 " uid:%" PRId64 " nRow:%d", TD_VID(pWriter->pTsdb->pVnode), __func__, + pWriter->inData.suid, pWriter->inData.uid, pWriter->inData.nRow); + } return code; } -static int32_t tsdbSnapWriteRowData(STsdbSnapWriter* pWriter, int32_t iRow) { +// SNAP_DATA_DEL +static int32_t tsdbSnapWriteDelTableDataStart(STsdbSnapWriter* pWriter, TABLEID* pId) { int32_t code = 0; + int32_t lino = 0; - SBlockData* pBlockData = &pWriter->bData; - TABLEID id = {.suid = pBlockData->suid, .uid = pBlockData->uid ? pBlockData->uid : pBlockData->aUid[iRow]}; - - // End last table data write if need - if (tTABLEIDCmprFn(&pWriter->id, &id) != 0) { - code = tsdbSnapWriteTableDataEnd(pWriter); - if (code) goto _err; - } - - // Start new table data write if need - if (pWriter->id.suid == 0 && pWriter->id.uid == 0) { - code = tsdbSnapWriteTableDataStart(pWriter, &id); - if (code) goto _err; - } - - // Merge with .data file data - int8_t done = 0; - if (pWriter->dReader.pBlockIdx && tTABLEIDCmprFn(pWriter->dReader.pBlockIdx, &id) == 0) { - code = tsdbSnapWriteToDataFile(pWriter, iRow, &done); - if (code) goto _err; - } - - // Append to the .stt data block (todo: check if need to set/reload sst block) - if (!done) { - code = tsdbSnapWriteToSttFile(pWriter, iRow); - if (code) goto _err; + if (pId) { + pWriter->tbid = *pId; + } else { + pWriter->tbid = (TABLEID){.suid = INT64_MAX, .uid = INT64_MAX}; } -_exit: - return code; + taosArrayClear(pWriter->aDelData); -_err: - tsdbError("vgId:%d, %s failed since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, tstrerror(code)); - return code; -} + if (pWriter->pTIter) { + while (pWriter->pTIter->tIter.iDelIdx < taosArrayGetSize(pWriter->pTIter->tIter.aDelIdx)) { + SDelIdx* pDelIdx = taosArrayGet(pWriter->pTIter->tIter.aDelIdx, pWriter->pTIter->tIter.iDelIdx); -static int32_t tsdbSnapWriteData(STsdbSnapWriter* pWriter, uint8_t* pData, uint32_t nData) { - int32_t code = 0; - STsdb* pTsdb = pWriter->pTsdb; - SBlockData* pBlockData = &pWriter->bData; + int32_t c = tTABLEIDCmprFn(pDelIdx, &pWriter->tbid); + if (c < 0) { + code = tsdbReadDelData(pWriter->pDelFReader, pDelIdx, pWriter->pTIter->tIter.aDelData); + TSDB_CHECK_CODE(code, lino, _exit); - // Decode data - SSnapDataHdr* pHdr = (SSnapDataHdr*)pData; - code = tDecmprBlockData(pHdr->data, pHdr->size, pBlockData, pWriter->aBuf); - if (code) goto _err; + SDelIdx* pDelIdxNew = taosArrayReserve(pWriter->aDelIdx, 1); + if (pDelIdxNew == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } - ASSERT(pBlockData->nRow > 0); + pDelIdxNew->suid = pDelIdx->suid; + pDelIdxNew->uid = pDelIdx->uid; - // Loop to handle each row - for (int32_t iRow = 0; iRow < pBlockData->nRow; iRow++) { - TSKEY ts = pBlockData->aTSKEY[iRow]; - int32_t fid = tsdbKeyFid(ts, pWriter->minutes, pWriter->precision); + code = tsdbWriteDelData(pWriter->pDelFWriter, pWriter->pTIter->tIter.aDelData, pDelIdxNew); + TSDB_CHECK_CODE(code, lino, _exit); - if (pWriter->dWriter.pWriter == NULL || pWriter->fid != fid) { - if (pWriter->dWriter.pWriter) { - // ASSERT(fid > pWriter->fid); + pWriter->pTIter->tIter.iDelIdx++; + } else if (c == 0) { + code = tsdbReadDelData(pWriter->pDelFReader, pDelIdx, pWriter->aDelData); + TSDB_CHECK_CODE(code, lino, _exit); - code = tsdbSnapWriteCloseFile(pWriter); - if (code) goto _err; + pWriter->pTIter->tIter.iDelIdx++; + break; + } else { + break; } - - code = tsdbSnapWriteOpenFile(pWriter, fid); - if (code) goto _err; } - - code = tsdbSnapWriteRowData(pWriter, iRow); - if (code) goto _err; } - return code; - -_err: - tsdbError("vgId:%d, vnode snapshot tsdb write data for %s failed since %s", TD_VID(pTsdb->pVnode), pTsdb->path, - tstrerror(code)); +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } else { + tsdbTrace("vgId:%d %s done, suid:%" PRId64 " uid:%" PRId64, TD_VID(pWriter->pTsdb->pVnode), __func__, pId->suid, + pId->uid); + } return code; } -// SNAP_DATA_DEL -static int32_t tsdbSnapMoveWriteDelData(STsdbSnapWriter* pWriter, TABLEID* pId) { +static int32_t tsdbSnapWriteDelTableDataEnd(STsdbSnapWriter* pWriter) { int32_t code = 0; + int32_t lino = 0; - while (true) { - if (pWriter->iDelIdx >= taosArrayGetSize(pWriter->aDelIdxR)) break; - - SDelIdx* pDelIdx = (SDelIdx*)taosArrayGet(pWriter->aDelIdxR, pWriter->iDelIdx); - - if (tTABLEIDCmprFn(pDelIdx, pId) >= 0) break; - - code = tsdbReadDelData(pWriter->pDelFReader, pDelIdx, pWriter->aDelData); - if (code) goto _exit; - - SDelIdx delIdx = *pDelIdx; - code = tsdbWriteDelData(pWriter->pDelFWriter, pWriter->aDelData, &delIdx); - if (code) goto _exit; - - if (taosArrayPush(pWriter->aDelIdxW, &delIdx) == NULL) { + if (taosArrayGetSize(pWriter->aDelData) > 0) { + SDelIdx* pDelIdx = taosArrayReserve(pWriter->aDelIdx, 1); + if (pDelIdx == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; - goto _exit; + TSDB_CHECK_CODE(code, lino, _exit); } - pWriter->iDelIdx++; + pDelIdx->suid = pWriter->tbid.suid; + pDelIdx->uid = pWriter->tbid.uid; + + code = tsdbWriteDelData(pWriter->pDelFWriter, pWriter->aDelData, pDelIdx); + TSDB_CHECK_CODE(code, lino, _exit); } _exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } else { + tsdbTrace("vgId:%d %s done", TD_VID(pWriter->pTsdb->pVnode), __func__); + } return code; } -static int32_t tsdbSnapWriteDel(STsdbSnapWriter* pWriter, uint8_t* pData, uint32_t nData) { +static int32_t tsdbSnapWriteDelTableData(STsdbSnapWriter* pWriter, TABLEID* pId, uint8_t* pData, int64_t size) { int32_t code = 0; - STsdb* pTsdb = pWriter->pTsdb; - - // Open del file if not opened yet - if (pWriter->pDelFWriter == NULL) { - SDelFile* pDelFile = pWriter->fs.pDelFile; - - // reader - if (pDelFile) { - code = tsdbDelFReaderOpen(&pWriter->pDelFReader, pDelFile, pTsdb); - if (code) goto _err; + int32_t lino = 0; - code = tsdbReadDelIdx(pWriter->pDelFReader, pWriter->aDelIdxR); - if (code) goto _err; - } else { - taosArrayClear(pWriter->aDelIdxR); + if (pId == NULL || pId->uid != pWriter->tbid.uid) { + if (pWriter->tbid.uid) { + code = tsdbSnapWriteDelTableDataEnd(pWriter); + TSDB_CHECK_CODE(code, lino, _exit); } - pWriter->iDelIdx = 0; - // writer - SDelFile delFile = {.commitID = pWriter->commitID}; - code = tsdbDelFWriterOpen(&pWriter->pDelFWriter, &delFile, pTsdb); - if (code) goto _err; - taosArrayClear(pWriter->aDelIdxW); + code = tsdbSnapWriteDelTableDataStart(pWriter, pId); + TSDB_CHECK_CODE(code, lino, _exit); } - SSnapDataHdr* pHdr = (SSnapDataHdr*)pData; - TABLEID id = *(TABLEID*)pHdr->data; + if (pId == NULL) goto _exit; - ASSERT(pHdr->size + sizeof(SSnapDataHdr) == nData); + int64_t n = 0; + while (n < size) { + SDelData delData; + n += tGetDelData(pData + n, &delData); - // Move write data < id - code = tsdbSnapMoveWriteDelData(pWriter, &id); - if (code) goto _err; + if (taosArrayPush(pWriter->aDelData, &delData) < 0) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + } + ASSERT(n == size); - // Merge incoming data with current - if (pWriter->iDelIdx < taosArrayGetSize(pWriter->aDelIdxR) && - tTABLEIDCmprFn(taosArrayGet(pWriter->aDelIdxR, pWriter->iDelIdx), &id) == 0) { - SDelIdx* pDelIdx = (SDelIdx*)taosArrayGet(pWriter->aDelIdxR, pWriter->iDelIdx); +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } + return code; +} - code = tsdbReadDelData(pWriter->pDelFReader, pDelIdx, pWriter->aDelData); - if (code) goto _err; +static int32_t tsdbSnapWriteDelDataStart(STsdbSnapWriter* pWriter) { + int32_t code = 0; + int32_t lino = 0; - pWriter->iDelIdx++; - } else { - taosArrayClear(pWriter->aDelData); - } + STsdb* pTsdb = pWriter->pTsdb; + SDelFile* pDelFile = pWriter->fs.pDelFile; - int64_t n = sizeof(SSnapDataHdr) + sizeof(TABLEID); - while (n < nData) { - SDelData delData; + pWriter->tbid = (TABLEID){0}; - n += tGetDelData(pData + n, &delData); + // reader + if (pDelFile) { + code = tsdbDelFReaderOpen(&pWriter->pDelFReader, pDelFile, pTsdb); + TSDB_CHECK_CODE(code, lino, _exit); - if (taosArrayPush(pWriter->aDelData, &delData) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } + code = tsdbOpenTombFileDataIter(pWriter->pDelFReader, &pWriter->pTIter); + TSDB_CHECK_CODE(code, lino, _exit); } - SDelIdx delIdx = {.suid = id.suid, .uid = id.uid}; - code = tsdbWriteDelData(pWriter->pDelFWriter, pWriter->aDelData, &delIdx); - if (code) goto _err; + // writer + code = tsdbDelFWriterOpen(&pWriter->pDelFWriter, &(SDelFile){.commitID = pWriter->commitID}, pTsdb); + TSDB_CHECK_CODE(code, lino, _exit); - if (taosArrayPush(pWriter->aDelIdxW, &delIdx) == NULL) { + if ((pWriter->aDelIdx = taosArrayInit(0, sizeof(SDelIdx))) == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; + TSDB_CHECK_CODE(code, lino, _exit); + } + if ((pWriter->aDelData = taosArrayInit(0, sizeof(SDelData))) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); } - return code; - -_err: - tsdbError("vgId:%d, vnode snapshot tsdb write del for %s failed since %s", TD_VID(pTsdb->pVnode), pTsdb->path, - tstrerror(code)); +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); + } else { + tsdbDebug("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__); + } return code; } -static int32_t tsdbSnapWriteDelEnd(STsdbSnapWriter* pWriter) { +static int32_t tsdbSnapWriteDelDataEnd(STsdbSnapWriter* pWriter) { int32_t code = 0; - STsdb* pTsdb = pWriter->pTsdb; + int32_t lino = 0; - if (pWriter->pDelFWriter == NULL) return code; + STsdb* pTsdb = pWriter->pTsdb; - TABLEID id = {.suid = INT64_MAX, .uid = INT64_MAX}; - code = tsdbSnapMoveWriteDelData(pWriter, &id); - if (code) goto _err; + // end remaining table with NULL data + code = tsdbSnapWriteDelTableData(pWriter, NULL, NULL, 0); + TSDB_CHECK_CODE(code, lino, _exit); - code = tsdbWriteDelIdx(pWriter->pDelFWriter, pWriter->aDelIdxW); - if (code) goto _err; + // update file-level info + code = tsdbWriteDelIdx(pWriter->pDelFWriter, pWriter->aDelIdx); + TSDB_CHECK_CODE(code, lino, _exit); code = tsdbUpdateDelFileHdr(pWriter->pDelFWriter); - if (code) goto _err; + TSDB_CHECK_CODE(code, lino, _exit); code = tsdbFSUpsertDelFile(&pWriter->fs, &pWriter->pDelFWriter->fDel); - if (code) goto _err; + TSDB_CHECK_CODE(code, lino, _exit); code = tsdbDelFWriterClose(&pWriter->pDelFWriter, 1); - if (code) goto _err; + TSDB_CHECK_CODE(code, lino, _exit); if (pWriter->pDelFReader) { code = tsdbDelFReaderClose(&pWriter->pDelFReader); - if (code) goto _err; + TSDB_CHECK_CODE(code, lino, _exit); + } + + if (pWriter->pTIter) { + tsdbCloseDataIter2(pWriter->pTIter); + pWriter->pTIter = NULL; } - tsdbInfo("vgId:%d, vnode snapshot tsdb write del for %s end", TD_VID(pTsdb->pVnode), pTsdb->path); +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); + } else { + tsdbInfo("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__); + } return code; +} + +static int32_t tsdbSnapWriteDelData(STsdbSnapWriter* pWriter, SSnapDataHdr* pHdr) { + int32_t code = 0; + int32_t lino = 0; + + STsdb* pTsdb = pWriter->pTsdb; -_err: - tsdbError("vgId:%d, vnode snapshot tsdb write del end for %s failed since %s", TD_VID(pTsdb->pVnode), pTsdb->path, - tstrerror(code)); + // start to write del data if need + if (pWriter->pDelFWriter == NULL) { + code = tsdbSnapWriteDelDataStart(pWriter); + TSDB_CHECK_CODE(code, lino, _exit); + } + + // do write del data + code = tsdbSnapWriteDelTableData(pWriter, (TABLEID*)pHdr->data, pHdr->data + sizeof(TABLEID), + pHdr->size - sizeof(TABLEID)); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + tsdbError("vgId:%d %s failed since %s", TD_VID(pTsdb->pVnode), __func__, tstrerror(code)); + } else { + tsdbTrace("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__); + } return code; } // APIs int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWriter** ppWriter) { - int32_t code = 0; - int32_t lino = 0; - STsdbSnapWriter* pWriter = NULL; + int32_t code = 0; + int32_t lino = 0; // alloc - pWriter = (STsdbSnapWriter*)taosMemoryCalloc(1, sizeof(*pWriter)); + STsdbSnapWriter* pWriter = (STsdbSnapWriter*)taosMemoryCalloc(1, sizeof(*pWriter)); if (pWriter == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; TSDB_CHECK_CODE(code, lino, _exit); @@ -1288,11 +1835,6 @@ int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWr pWriter->pTsdb = pTsdb; pWriter->sver = sver; pWriter->ever = ever; - - code = tsdbFSCopy(pTsdb, &pWriter->fs); - TSDB_CHECK_CODE(code, lino, _exit); - - // config pWriter->minutes = pTsdb->keepCfg.days; pWriter->precision = pTsdb->keepCfg.precision; pWriter->minRow = pTsdb->pVnode->config.tsdbCfg.minRows; @@ -1300,102 +1842,70 @@ int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWr pWriter->cmprAlg = pTsdb->pVnode->config.tsdbCfg.compression; pWriter->commitID = pTsdb->pVnode->state.commitID; + code = tsdbFSCopy(pTsdb, &pWriter->fs); + TSDB_CHECK_CODE(code, lino, _exit); + // SNAP_DATA_TSDB - code = tBlockDataCreate(&pWriter->bData); + code = tBlockDataCreate(&pWriter->inData); TSDB_CHECK_CODE(code, lino, _exit); pWriter->fid = INT32_MIN; - pWriter->id = (TABLEID){0}; - // Reader - pWriter->dReader.aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx)); - if (pWriter->dReader.aBlockIdx == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - code = tBlockDataCreate(&pWriter->dReader.bData); - TSDB_CHECK_CODE(code, lino, _exit); - // Writer - pWriter->dWriter.aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx)); - if (pWriter->dWriter.aBlockIdx == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - pWriter->dWriter.aSttBlk = taosArrayInit(0, sizeof(SSttBlk)); - if (pWriter->dWriter.aSttBlk == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - code = tBlockDataCreate(&pWriter->dWriter.bData); + code = tBlockDataCreate(&pWriter->bData); TSDB_CHECK_CODE(code, lino, _exit); - code = tBlockDataCreate(&pWriter->dWriter.sData); + + code = tBlockDataCreate(&pWriter->sData); TSDB_CHECK_CODE(code, lino, _exit); // SNAP_DATA_DEL - pWriter->aDelIdxR = taosArrayInit(0, sizeof(SDelIdx)); - if (pWriter->aDelIdxR == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - pWriter->aDelData = taosArrayInit(0, sizeof(SDelData)); - if (pWriter->aDelData == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - pWriter->aDelIdxW = taosArrayInit(0, sizeof(SDelIdx)); - if (pWriter->aDelIdxW == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } _exit: if (code) { - tsdbError("vgId:%d, %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); - *ppWriter = NULL; - + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); if (pWriter) { - if (pWriter->aDelIdxW) taosArrayDestroy(pWriter->aDelIdxW); - if (pWriter->aDelData) taosArrayDestroy(pWriter->aDelData); - if (pWriter->aDelIdxR) taosArrayDestroy(pWriter->aDelIdxR); - tBlockDataDestroy(&pWriter->dWriter.sData); - tBlockDataDestroy(&pWriter->dWriter.bData); - if (pWriter->dWriter.aSttBlk) taosArrayDestroy(pWriter->dWriter.aSttBlk); - if (pWriter->dWriter.aBlockIdx) taosArrayDestroy(pWriter->dWriter.aBlockIdx); - tBlockDataDestroy(&pWriter->dReader.bData); - if (pWriter->dReader.aBlockIdx) taosArrayDestroy(pWriter->dReader.aBlockIdx); + tBlockDataDestroy(&pWriter->sData); tBlockDataDestroy(&pWriter->bData); + tBlockDataDestroy(&pWriter->inData); tsdbFSDestroy(&pWriter->fs); - taosMemoryFree(pWriter); + pWriter = NULL; } } else { - tsdbInfo("vgId:%d, %s done", TD_VID(pTsdb->pVnode), __func__); - *ppWriter = pWriter; + tsdbInfo("vgId:%d %s done, sver:%" PRId64 " ever:%" PRId64, TD_VID(pTsdb->pVnode), __func__, sver, ever); } + *ppWriter = pWriter; return code; } int32_t tsdbSnapWriterPrepareClose(STsdbSnapWriter* pWriter) { int32_t code = 0; - if (pWriter->dWriter.pWriter) { - code = tsdbSnapWriteCloseFile(pWriter); - if (code) goto _exit; + int32_t lino = 0; + + if (pWriter->pDataFWriter) { + code = tsdbSnapWriteFileDataEnd(pWriter); + TSDB_CHECK_CODE(code, lino, _exit); } - code = tsdbSnapWriteDelEnd(pWriter); - if (code) goto _exit; + if (pWriter->pDelFWriter) { + code = tsdbSnapWriteDelDataEnd(pWriter); + TSDB_CHECK_CODE(code, lino, _exit); + } code = tsdbFSPrepareCommit(pWriter->pTsdb, &pWriter->fs); - if (code) goto _exit; + TSDB_CHECK_CODE(code, lino, _exit); _exit: if (code) { - tsdbError("vgId:%d, %s failed since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, tstrerror(code)); + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + } else { + tsdbDebug("vgId:%d %s done", TD_VID(pWriter->pTsdb->pVnode), __func__); } return code; } int32_t tsdbSnapWriterClose(STsdbSnapWriter** ppWriter, int8_t rollback) { - int32_t code = 0; + int32_t code = 0; + int32_t lino = 0; + STsdbSnapWriter* pWriter = *ppWriter; STsdb* pTsdb = pWriter->pTsdb; @@ -1408,7 +1918,7 @@ int32_t tsdbSnapWriterClose(STsdbSnapWriter** ppWriter, int8_t rollback) { code = tsdbFSCommit(pWriter->pTsdb); if (code) { taosThreadRwlockUnlock(&pTsdb->rwLock); - goto _err; + TSDB_CHECK_CODE(code, lino, _exit); } // unlock @@ -1416,72 +1926,60 @@ int32_t tsdbSnapWriterClose(STsdbSnapWriter** ppWriter, int8_t rollback) { } // SNAP_DATA_DEL - taosArrayDestroy(pWriter->aDelIdxW); taosArrayDestroy(pWriter->aDelData); - taosArrayDestroy(pWriter->aDelIdxR); + taosArrayDestroy(pWriter->aDelIdx); // SNAP_DATA_TSDB - - // Writer - tBlockDataDestroy(&pWriter->dWriter.sData); - tBlockDataDestroy(&pWriter->dWriter.bData); - taosArrayDestroy(pWriter->dWriter.aSttBlk); - tMapDataClear(&pWriter->dWriter.mDataBlk); - taosArrayDestroy(pWriter->dWriter.aBlockIdx); - - // Reader - tBlockDataDestroy(&pWriter->dReader.bData); - tMapDataClear(&pWriter->dReader.mDataBlk); - taosArrayDestroy(pWriter->dReader.aBlockIdx); - + tBlockDataDestroy(&pWriter->sData); tBlockDataDestroy(&pWriter->bData); + taosArrayDestroy(pWriter->aSttBlk); + tMapDataClear(&pWriter->mDataBlk); + taosArrayDestroy(pWriter->aBlockIdx); tDestroyTSchema(pWriter->skmTable.pTSchema); + tBlockDataDestroy(&pWriter->inData); for (int32_t iBuf = 0; iBuf < sizeof(pWriter->aBuf) / sizeof(uint8_t*); iBuf++) { tFree(pWriter->aBuf[iBuf]); } - tsdbInfo("vgId:%d, %s done", TD_VID(pWriter->pTsdb->pVnode), __func__); + tsdbFSDestroy(&pWriter->fs); taosMemoryFree(pWriter); *ppWriter = NULL; - return code; -_err: - tsdbError("vgId:%d, vnode snapshot tsdb writer close for %s failed since %s", TD_VID(pWriter->pTsdb->pVnode), - pWriter->pTsdb->path, tstrerror(code)); - taosMemoryFree(pWriter); - *ppWriter = NULL; +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); + } else { + tsdbInfo("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__); + } return code; } -int32_t tsdbSnapWrite(STsdbSnapWriter* pWriter, uint8_t* pData, uint32_t nData) { - int32_t code = 0; - SSnapDataHdr* pHdr = (SSnapDataHdr*)pData; +int32_t tsdbSnapWrite(STsdbSnapWriter* pWriter, SSnapDataHdr* pHdr) { + int32_t code = 0; + int32_t lino = 0; - // ts data if (pHdr->type == SNAP_DATA_TSDB) { - code = tsdbSnapWriteData(pWriter, pData, nData); - if (code) goto _err; - + code = tsdbSnapWriteTimeSeriesData(pWriter, pHdr); + TSDB_CHECK_CODE(code, lino, _exit); goto _exit; - } else { - if (pWriter->dWriter.pWriter) { - code = tsdbSnapWriteCloseFile(pWriter); - if (code) goto _err; - } + } else if (pWriter->pDataFWriter) { + code = tsdbSnapWriteFileDataEnd(pWriter); + TSDB_CHECK_CODE(code, lino, _exit); } - // del data if (pHdr->type == SNAP_DATA_DEL) { - code = tsdbSnapWriteDel(pWriter, pData, nData); - if (code) goto _err; + code = tsdbSnapWriteDelData(pWriter, pHdr); + TSDB_CHECK_CODE(code, lino, _exit); + goto _exit; } _exit: - tsdbDebug("vgId:%d, tsdb snapshot write for %s succeed", TD_VID(pWriter->pTsdb->pVnode), pWriter->pTsdb->path); - return code; - -_err: - tsdbError("vgId:%d, tsdb snapshot write for %s failed since %s", TD_VID(pWriter->pTsdb->pVnode), pWriter->pTsdb->path, - tstrerror(code)); + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s, type:%d index:%" PRId64 " size:%" PRId64, + TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code), pHdr->type, pHdr->index, pHdr->size); + } else { + tsdbDebug("vgId:%d %s done, type:%d index:%" PRId64 " size:%" PRId64, TD_VID(pWriter->pTsdb->pVnode), __func__, + pHdr->type, pHdr->index, pHdr->size); + } return code; } diff --git a/source/dnode/vnode/src/tsdb/tsdbUtil.c b/source/dnode/vnode/src/tsdb/tsdbUtil.c index f4ac4b7a975b9639d865ef7bd1a9631edff7fa37..4e051b8b30430c4c919fb2e417cf8f3346b4e4b0 100644 --- a/source/dnode/vnode/src/tsdb/tsdbUtil.c +++ b/source/dnode/vnode/src/tsdb/tsdbUtil.c @@ -732,6 +732,7 @@ int32_t tsdbRowMergerAdd(SRowMerger *pMerger, TSDBROW *pRow, STSchema *pTSchema) tsdbRowGetColVal(pRow, pTSchema, jCol++, pColVal); if (key.version > pMerger->version) { +#if 0 if (!COL_VAL_IS_NONE(pColVal)) { if ((!COL_VAL_IS_NULL(pColVal)) && IS_VAR_DATA_TYPE(pColVal->type)) { SColVal *tColVal = taosArrayGet(pMerger->pArray, iCol); @@ -747,6 +748,28 @@ int32_t tsdbRowMergerAdd(SRowMerger *pMerger, TSDBROW *pRow, STSchema *pTSchema) taosArraySet(pMerger->pArray, iCol, pColVal); } } +#endif + if (!COL_VAL_IS_NONE(pColVal)) { + if (IS_VAR_DATA_TYPE(pColVal->type)) { + SColVal *pTColVal = taosArrayGet(pMerger->pArray, iCol); + if (!COL_VAL_IS_NULL(pColVal)) { + code = tRealloc(&pTColVal->value.pData, pColVal->value.nData); + if (code) return code; + + pTColVal->value.nData = pColVal->value.nData; + if (pTColVal->value.nData) { + memcpy(pTColVal->value.pData, pColVal->value.pData, pTColVal->value.nData); + } + pTColVal->flag = 0; + } else { + tFree(pTColVal->value.pData); + pTColVal->value.pData = NULL; + taosArraySet(pMerger->pArray, iCol, pColVal); + } + } else { + taosArraySet(pMerger->pArray, iCol, pColVal); + } + } } else if (key.version < pMerger->version) { SColVal *tColVal = (SColVal *)taosArrayGet(pMerger->pArray, iCol); if (COL_VAL_IS_NONE(tColVal) && !COL_VAL_IS_NONE(pColVal)) { @@ -1110,6 +1133,7 @@ _exit: void tBlockDataReset(SBlockData *pBlockData) { pBlockData->suid = 0; pBlockData->uid = 0; + pBlockData->nRow = 0; } void tBlockDataClear(SBlockData *pBlockData) { diff --git a/source/dnode/vnode/src/vnd/vnodeSnapshot.c b/source/dnode/vnode/src/vnd/vnodeSnapshot.c index e75dc24329c03ade45e12242fd70a62b963f74ff..43f903dc4867178919e3d3b519b899afcca6d835 100644 --- a/source/dnode/vnode/src/vnd/vnodeSnapshot.c +++ b/source/dnode/vnode/src/vnd/vnodeSnapshot.c @@ -455,7 +455,7 @@ int32_t vnodeSnapWrite(SVSnapWriter *pWriter, uint8_t *pData, uint32_t nData) { if (code) goto _err; } - code = tsdbSnapWrite(pWriter->pTsdbSnapWriter, pData, nData); + code = tsdbSnapWrite(pWriter->pTsdbSnapWriter, pHdr); if (code) goto _err; } break; case SNAP_DATA_TQ_HANDLE: { diff --git a/source/libs/catalog/inc/catalogInt.h b/source/libs/catalog/inc/catalogInt.h index 836ce87fbb7bb0620567be05030527c4ca7ac101..7ee7a24f97146ce91b1294d21a81e8cf163b2993 100644 --- a/source/libs/catalog/inc/catalogInt.h +++ b/source/libs/catalog/inc/catalogInt.h @@ -805,6 +805,7 @@ int32_t ctgMakeVgArray(SDBVgInfo* dbInfo); int32_t ctgAcquireVgMetaFromCache(SCatalog *pCtg, const char *dbFName, const char *tbName, SCtgDBCache **pDb, SCtgTbCache **pTb); int32_t ctgCopyTbMeta(SCatalog *pCtg, SCtgTbMetaCtx *ctx, SCtgDBCache **pDb, SCtgTbCache **pTb, STableMeta **pTableMeta, char* dbFName); void ctgReleaseVgMetaToCache(SCatalog *pCtg, SCtgDBCache *dbCache, SCtgTbCache *pCache); +void ctgReleaseTbMetaToCache(SCatalog *pCtg, SCtgDBCache *dbCache, SCtgTbCache *pCache); extern SCatalogMgmt gCtgMgmt; extern SCtgDebug gCTGDebug; diff --git a/source/libs/catalog/src/catalog.c b/source/libs/catalog/src/catalog.c index c7af0411bea055b5cf6afb13941735ce3c104eaa..f9a218835ea77d4a20aa1b7ac8086187c995c561 100644 --- a/source/libs/catalog/src/catalog.c +++ b/source/libs/catalog/src/catalog.c @@ -598,10 +598,16 @@ int32_t ctgGetCachedTbVgMeta(SCatalog* pCtg, const SName* pTableName, SVgroupInf CTG_ERR_JRET(ctgGetVgInfoFromHashValue(pCtg, dbCache->vgCache.vgInfo, pTableName, pVgroup)); + ctgRUnlockVgInfo(dbCache); + SCtgTbMetaCtx ctx = {0}; ctx.pName = (SName*)pTableName; ctx.flag = CTG_FLAG_UNKNOWN_STB; - CTG_ERR_JRET(ctgCopyTbMeta(pCtg, &ctx, &dbCache, &tbCache, pTableMeta, db)); + code = ctgCopyTbMeta(pCtg, &ctx, &dbCache, &tbCache, pTableMeta, db); + + ctgReleaseTbMetaToCache(pCtg, dbCache, tbCache); + + CTG_RET(code); _return: diff --git a/source/libs/catalog/src/ctgAsync.c b/source/libs/catalog/src/ctgAsync.c index 438128203ef10867f433693940ab4d62e0c65ebd..325d6e0e46c61e398e3766b931fe6141abd710c4 100644 --- a/source/libs/catalog/src/ctgAsync.c +++ b/source/libs/catalog/src/ctgAsync.c @@ -999,6 +999,7 @@ int32_t ctgHandleGetTbMetaRsp(SCtgTaskReq* tReq, int32_t reqType, const SDataBuf CTG_ERR_JRET(ctgGetTbMetaFromVnode(pCtg, pConn, pName, &vgInfo, NULL, tReq)); ctgReleaseVgInfoToCache(pCtg, dbCache); + dbCache = NULL; } else { SBuildUseDBInput input = {0}; @@ -1168,6 +1169,7 @@ int32_t ctgHandleGetTbMetasRsp(SCtgTaskReq* tReq, int32_t reqType, const SDataBu CTG_ERR_JRET(ctgGetTbMetaFromVnode(pCtg, pConn, pName, &vgInfo, NULL, tReq)); ctgReleaseVgInfoToCache(pCtg, dbCache); + dbCache = NULL; } else { SBuildUseDBInput input = {0}; diff --git a/source/libs/catalog/src/ctgCache.c b/source/libs/catalog/src/ctgCache.c index c266cc1df9fc0ab5bd61574f519920df327630c9..6e4077eae05aa8cead28111f13f9600bb4e3244d 100644 --- a/source/libs/catalog/src/ctgCache.c +++ b/source/libs/catalog/src/ctgCache.c @@ -2118,7 +2118,7 @@ int32_t ctgOpUpdateEpset(SCtgCacheOperation *operation) { _return: - if (dbCache) { + if (code == TSDB_CODE_SUCCESS && dbCache) { ctgWUnlockVgInfo(dbCache); } diff --git a/source/libs/command/src/command.c b/source/libs/command/src/command.c index c87f6953eb5fbb2515b2db54ab1e85854111eeb2..6eef1ded695accbcf204e004c4d00af3987d8d91 100644 --- a/source/libs/command/src/command.c +++ b/source/libs/command/src/command.c @@ -281,10 +281,10 @@ static void setCreateDBResultIntoDataBlock(SSDataBlock* pBlock, char* dbFName, S len += sprintf( buf2 + VARSTR_HEADER_SIZE, "CREATE DATABASE `%s` BUFFER %d CACHESIZE %d CACHEMODEL '%s' COMP %d DURATION %dm " - "WAL_FSYNC_PERIOD %d MAXROWS %d MINROWS %d KEEP %dm,%dm,%dm PAGES %d PAGESIZE %d PRECISION '%s' REPLICA %d " + "WAL_FSYNC_PERIOD %d MAXROWS %d MINROWS %d STT_TRIGGER %d KEEP %dm,%dm,%dm PAGES %d PAGESIZE %d PRECISION '%s' REPLICA %d " "WAL_LEVEL %d VGROUPS %d SINGLE_STABLE %d", dbFName, pCfg->buffer, pCfg->cacheSize, cacheModelStr(pCfg->cacheLast), pCfg->compression, pCfg->daysPerFile, - pCfg->walFsyncPeriod, pCfg->maxRows, pCfg->minRows, pCfg->daysToKeep0, pCfg->daysToKeep1, pCfg->daysToKeep2, + pCfg->walFsyncPeriod, pCfg->maxRows, pCfg->minRows, pCfg->sstTrigger, pCfg->daysToKeep0, pCfg->daysToKeep1, pCfg->daysToKeep2, pCfg->pages, pCfg->pageSize, prec, pCfg->replications, pCfg->walLevel, pCfg->numOfVgroups, 1 == pCfg->numOfStables); diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index 6ec60ee85722d6342074cbc7e2e87fa41452b943..c599b479e61b4c30a46713e4549983a6fd448c19 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -24,12 +24,16 @@ static TdThreadOnce initPoolOnce = PTHREAD_ONCE_INIT; int32_t exchangeObjRefPool = -1; -static void initRefPool() { exchangeObjRefPool = taosOpenRef(1024, doDestroyExchangeOperatorInfo); } static void cleanupRefPool() { int32_t ref = atomic_val_compare_exchange_32(&exchangeObjRefPool, exchangeObjRefPool, 0); taosCloseRef(ref); } +static void initRefPool() { + exchangeObjRefPool = taosOpenRef(1024, doDestroyExchangeOperatorInfo); + atexit(cleanupRefPool); +} + static int32_t doSetSMABlock(SOperatorInfo* pOperator, void* input, size_t numOfBlocks, int32_t type, char* id) { ASSERT(pOperator != NULL); if (pOperator->operatorType != QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN) { @@ -448,7 +452,6 @@ int32_t qCreateExecTask(SReadHandle* readHandle, int32_t vgId, uint64_t taskId, SExecTaskInfo** pTask = (SExecTaskInfo**)pTaskInfo; taosThreadOnce(&initPoolOnce, initRefPool); - atexit(cleanupRefPool); qDebug("start to create subplan task, TID:0x%" PRIx64 " QID:0x%" PRIx64, taskId, pSubplan->id.queryId); diff --git a/source/libs/executor/src/groupoperator.c b/source/libs/executor/src/groupoperator.c index 3bc00b79b1aea8bd8dc6d586ed8598b3b5672e4a..fb122b077fc21477b18c5db83b28d0caa294e605 100644 --- a/source/libs/executor/src/groupoperator.c +++ b/source/libs/executor/src/groupoperator.c @@ -593,8 +593,11 @@ void* getCurrentDataGroupInfo(const SPartitionOperatorInfo* pInfo, SDataGroupInf int32_t pageId = 0; pPage = getNewBufPage(pInfo->pBuf, &pageId); - taosArrayPush(p->pPageList, &pageId); + if (pPage == NULL) { + return pPage; + } + taosArrayPush(p->pPageList, &pageId); *(int32_t*)pPage = 0; } else { int32_t* curId = taosArrayGetLast(p->pPageList); @@ -612,6 +615,11 @@ void* getCurrentDataGroupInfo(const SPartitionOperatorInfo* pInfo, SDataGroupInf // add a new page for current group int32_t pageId = 0; pPage = getNewBufPage(pInfo->pBuf, &pageId); + if (pPage == NULL) { + qError("failed to get new buffer, code:%s", tstrerror(terrno)); + return NULL; + } + taosArrayPush(p->pPageList, &pageId); memset(pPage, 0, getBufPageSize(pInfo->pBuf)); } diff --git a/source/libs/executor/src/sysscanoperator.c b/source/libs/executor/src/sysscanoperator.c index ddcaaf2c72778d1a8488ee353722bef2e337fb42..2b78f265fb4d6f2a1e41350f2f649582f9b84213 100644 --- a/source/libs/executor/src/sysscanoperator.c +++ b/source/libs/executor/src/sysscanoperator.c @@ -66,7 +66,7 @@ typedef struct SSysTableScanInfo { int64_t numOfBlocks; // extract basic running information. SLoadRemoteDataInfo loadInfo; - int32_t tbnameSlotId; + int32_t tbnameSlotId; } SSysTableScanInfo; typedef struct { @@ -81,10 +81,10 @@ typedef struct MergeIndex { } MergeIndex; typedef struct SBlockDistInfo { - SSDataBlock* pResBlock; - STsdbReader* pHandle; - SReadHandle readHandle; - uint64_t uid; // table uid + SSDataBlock* pResBlock; + STsdbReader* pHandle; + SReadHandle readHandle; + uint64_t uid; // table uid } SBlockDistInfo; static int32_t sysChkFilter__Comm(SNode* pNode); @@ -129,20 +129,20 @@ static char* SYSTABLE_IDX_COLUMN[] = {"table_name", "db_name", "create_time" static char* SYSTABLE_SPECIAL_COL[] = {"db_name", "vgroup_id"}; -static int32_t buildSysDbTableInfo(const SSysTableScanInfo* pInfo, int32_t capacity); -static SSDataBlock* buildInfoSchemaTableMetaBlock(char* tableName); -static void destroySysScanOperator(void* param); -static int32_t loadSysTableCallback(void* param, SDataBuf* pMsg, int32_t code); -static SSDataBlock* doFilterResult(SSDataBlock* pDataBlock, SFilterInfo* pFilterInfo); +static int32_t buildSysDbTableInfo(const SSysTableScanInfo* pInfo, int32_t capacity); +static SSDataBlock* buildInfoSchemaTableMetaBlock(char* tableName); +static void destroySysScanOperator(void* param); +static int32_t loadSysTableCallback(void* param, SDataBuf* pMsg, int32_t code); +static SSDataBlock* doFilterResult(SSDataBlock* pDataBlock, SFilterInfo* pFilterInfo); static __optSysFilter optSysGetFilterFunc(int32_t ctype, bool* reverse); static int32_t sysTableUserTagsFillOneTableTags(const SSysTableScanInfo* pInfo, SMetaReader* smrSuperTable, SMetaReader* smrChildTable, const char* dbname, const char* tableName, int32_t* pNumOfRows, const SSDataBlock* dataBlock); -static int32_t sysTableUserColsFillOneTableCols(const SSysTableScanInfo* pInfo, const char* dbname, - int32_t* pNumOfRows, const SSDataBlock* dataBlock, - char* tName, SSchemaWrapper* schemaRow, char* tableType); +static int32_t sysTableUserColsFillOneTableCols(const SSysTableScanInfo* pInfo, const char* dbname, int32_t* pNumOfRows, + const SSDataBlock* dataBlock, char* tName, SSchemaWrapper* schemaRow, + char* tableType); static void relocateAndFilterSysTagsScanResult(SSysTableScanInfo* pInfo, int32_t numOfRows, SSDataBlock* dataBlock, SFilterInfo* pFilterInfo); @@ -204,11 +204,11 @@ int32_t sysFilte__TableName(void* arg, SNode* pNode, SArray* result) { if (func == NULL) return -1; SMetaFltParam param = {.suid = 0, - .cid = 0, - .type = TSDB_DATA_TYPE_VARCHAR, - .val = pVal->datum.p, - .reverse = reverse, - .filterFunc = func}; + .cid = 0, + .type = TSDB_DATA_TYPE_VARCHAR, + .val = pVal->datum.p, + .reverse = reverse, + .filterFunc = func}; return -1; } @@ -223,11 +223,11 @@ int32_t sysFilte__CreateTime(void* arg, SNode* pNode, SArray* result) { if (func == NULL) return -1; SMetaFltParam param = {.suid = 0, - .cid = 0, - .type = TSDB_DATA_TYPE_BIGINT, - .val = &pVal->datum.i, - .reverse = reverse, - .filterFunc = func}; + .cid = 0, + .type = TSDB_DATA_TYPE_BIGINT, + .val = &pVal->datum.i, + .reverse = reverse, + .filterFunc = func}; int32_t ret = metaFilterCreateTime(pMeta, ¶m, result); return ret; @@ -355,9 +355,9 @@ static int32_t optSysMergeRslt(SArray* mRslt, SArray* rslt); static SSDataBlock* sysTableScanFromMNode(SOperatorInfo* pOperator, SSysTableScanInfo* pInfo, const char* name, SExecTaskInfo* pTaskInfo); void extractTbnameSlotId(SSysTableScanInfo* pInfo, const SScanPhysiNode* pScanNode); -static SSDataBlock* sysTableScanFillTbName(SOperatorInfo* pOperator, const SSysTableScanInfo* pInfo, - const char* name, SSDataBlock* pBlock); -__optSysFilter optSysGetFilterFunc(int32_t ctype, bool* reverse) { +static SSDataBlock* sysTableScanFillTbName(SOperatorInfo* pOperator, const SSysTableScanInfo* pInfo, const char* name, + SSDataBlock* pBlock); +__optSysFilter optSysGetFilterFunc(int32_t ctype, bool* reverse) { if (ctype == OP_TYPE_LOWER_EQUAL || ctype == OP_TYPE_LOWER_THAN) { *reverse = true; } @@ -479,13 +479,13 @@ static SSDataBlock* sysTableScanUserCols(SOperatorInfo* pOperator) { } } - char typeName[TSDB_TABLE_FNAME_LEN + VARSTR_HEADER_SIZE] = {0}; - SSchemaWrapper *schemaRow = NULL; - if(smrTable.me.type == TSDB_SUPER_TABLE){ - schemaRow = &smrTable.me.stbEntry.schemaRow; + char typeName[TSDB_TABLE_FNAME_LEN + VARSTR_HEADER_SIZE] = {0}; + SSchemaWrapper* schemaRow = NULL; + if (smrTable.me.type == TSDB_SUPER_TABLE) { + schemaRow = &smrTable.me.stbEntry.schemaRow; STR_TO_VARSTR(typeName, "CHILD_TABLE"); - }else if(smrTable.me.type == TSDB_NORMAL_TABLE){ - schemaRow = &smrTable.me.ntbEntry.schemaRow; + } else if (smrTable.me.type == TSDB_NORMAL_TABLE) { + schemaRow = &smrTable.me.ntbEntry.schemaRow; STR_TO_VARSTR(typeName, "NORMAL_TABLE"); } @@ -507,50 +507,50 @@ static SSDataBlock* sysTableScanUserCols(SOperatorInfo* pOperator) { pInfo->pCur = metaOpenTbCursor(pInfo->readHandle.meta); } - SHashObj *stableSchema = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false, HASH_NO_LOCK); + SHashObj* stableSchema = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false, HASH_NO_LOCK); taosHashSetFreeFp(stableSchema, tDeleteSSchemaWrapperForHash); while ((ret = metaTbCursorNext(pInfo->pCur, TSDB_TABLE_MAX)) == 0) { char typeName[TSDB_TABLE_FNAME_LEN + VARSTR_HEADER_SIZE] = {0}; char tableName[TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE] = {0}; - SSchemaWrapper *schemaRow = NULL; + SSchemaWrapper* schemaRow = NULL; - if(pInfo->pCur->mr.me.type == TSDB_SUPER_TABLE){ + if (pInfo->pCur->mr.me.type == TSDB_SUPER_TABLE) { qDebug("sysTableScanUserCols cursor get super table"); - void *schema = taosHashGet(stableSchema, &pInfo->pCur->mr.me.uid, sizeof(int64_t)); - if(schema == NULL){ - SSchemaWrapper *schemaWrapper = tCloneSSchemaWrapper(&pInfo->pCur->mr.me.stbEntry.schemaRow); + void* schema = taosHashGet(stableSchema, &pInfo->pCur->mr.me.uid, sizeof(int64_t)); + if (schema == NULL) { + SSchemaWrapper* schemaWrapper = tCloneSSchemaWrapper(&pInfo->pCur->mr.me.stbEntry.schemaRow); taosHashPut(stableSchema, &pInfo->pCur->mr.me.uid, sizeof(int64_t), &schemaWrapper, POINTER_BYTES); } continue; - }else if (pInfo->pCur->mr.me.type == TSDB_CHILD_TABLE) { + } else if (pInfo->pCur->mr.me.type == TSDB_CHILD_TABLE) { qDebug("sysTableScanUserCols cursor get child table"); STR_TO_VARSTR(typeName, "CHILD_TABLE"); STR_TO_VARSTR(tableName, pInfo->pCur->mr.me.name); int64_t suid = pInfo->pCur->mr.me.ctbEntry.suid; - void *schema = taosHashGet(stableSchema, &pInfo->pCur->mr.me.ctbEntry.suid, sizeof(int64_t)); - if(schema != NULL){ - schemaRow = *(SSchemaWrapper **)schema; - }else{ + void* schema = taosHashGet(stableSchema, &pInfo->pCur->mr.me.ctbEntry.suid, sizeof(int64_t)); + if (schema != NULL) { + schemaRow = *(SSchemaWrapper**)schema; + } else { tDecoderClear(&pInfo->pCur->mr.coder); int code = metaGetTableEntryByUid(&pInfo->pCur->mr, suid); if (code != TSDB_CODE_SUCCESS) { // terrno has been set by metaGetTableEntryByName, therefore, return directly - qError("sysTableScanUserCols get meta by suid:%"PRId64 " error, code:%d", suid, code); + qError("sysTableScanUserCols get meta by suid:%" PRId64 " error, code:%d", suid, code); blockDataDestroy(dataBlock); pInfo->loadInfo.totalRows = 0; taosHashCleanup(stableSchema); return NULL; } - schemaRow = &pInfo->pCur->mr.me.stbEntry.schemaRow; + schemaRow = &pInfo->pCur->mr.me.stbEntry.schemaRow; } - }else if(pInfo->pCur->mr.me.type == TSDB_NORMAL_TABLE){ + } else if (pInfo->pCur->mr.me.type == TSDB_NORMAL_TABLE) { qDebug("sysTableScanUserCols cursor get normal table"); - schemaRow = &pInfo->pCur->mr.me.ntbEntry.schemaRow; + schemaRow = &pInfo->pCur->mr.me.ntbEntry.schemaRow; STR_TO_VARSTR(typeName, "NORMAL_TABLE"); STR_TO_VARSTR(tableName, pInfo->pCur->mr.me.name); - }else{ + } else { qDebug("sysTableScanUserCols cursor get invalid table"); continue; } @@ -665,6 +665,7 @@ static SSDataBlock* sysTableScanUserTags(SOperatorInfo* pOperator) { pInfo->pCur = metaOpenTbCursor(pInfo->readHandle.meta); } + bool blockFull = false; while ((ret = metaTbCursorNext(pInfo->pCur, TSDB_SUPER_TABLE)) == 0) { if (pInfo->pCur->mr.me.type != TSDB_CHILD_TABLE) { continue; @@ -686,17 +687,25 @@ static SSDataBlock* sysTableScanUserTags(SOperatorInfo* pOperator) { T_LONG_JMP(pTaskInfo->env, terrno); } - sysTableUserTagsFillOneTableTags(pInfo, &smrSuperTable, &pInfo->pCur->mr, dbname, tableName, &numOfRows, dataBlock); + if ((smrSuperTable.me.stbEntry.schemaTag.nCols + numOfRows) > pOperator->resultInfo.capacity) { + metaTbCursorPrev(pInfo->pCur); + blockFull = true; + } else { + sysTableUserTagsFillOneTableTags(pInfo, &smrSuperTable, &pInfo->pCur->mr, dbname, tableName, &numOfRows, + dataBlock); + } metaReaderClear(&smrSuperTable); - if (numOfRows >= pOperator->resultInfo.capacity) { + if (blockFull || numOfRows >= pOperator->resultInfo.capacity) { relocateAndFilterSysTagsScanResult(pInfo, numOfRows, dataBlock, pOperator->exprSupp.pFilterInfo); numOfRows = 0; if (pInfo->pRes->info.rows > 0) { break; } + + blockFull = false; } } @@ -902,10 +911,10 @@ static int32_t sysTableUserTagsFillOneTableTags(const SSysTableScanInfo* pInfo, return TSDB_CODE_SUCCESS; } -static int32_t sysTableUserColsFillOneTableCols(const SSysTableScanInfo* pInfo, const char* dbname, - int32_t* pNumOfRows, const SSDataBlock* dataBlock, char* tName, - SSchemaWrapper* schemaRow, char* tableType) { - if(schemaRow == NULL){ +static int32_t sysTableUserColsFillOneTableCols(const SSysTableScanInfo* pInfo, const char* dbname, int32_t* pNumOfRows, + const SSDataBlock* dataBlock, char* tName, SSchemaWrapper* schemaRow, + char* tableType) { + if (schemaRow == NULL) { qError("sysTableUserColsFillOneTableCols schemaRow is NULL"); return TSDB_CODE_SUCCESS; } @@ -941,9 +950,8 @@ static int32_t sysTableUserColsFillOneTableCols(const SSysTableScanInfo* pInfo, colTypeLen += sprintf(varDataVal(colTypeStr) + colTypeLen, "(%d)", (int32_t)(schemaRow->pSchema[i].bytes - VARSTR_HEADER_SIZE)); } else if (colType == TSDB_DATA_TYPE_NCHAR) { - colTypeLen += sprintf( - varDataVal(colTypeStr) + colTypeLen, "(%d)", - (int32_t)((schemaRow->pSchema[i].bytes - VARSTR_HEADER_SIZE) / TSDB_NCHAR_SIZE)); + colTypeLen += sprintf(varDataVal(colTypeStr) + colTypeLen, "(%d)", + (int32_t)((schemaRow->pSchema[i].bytes - VARSTR_HEADER_SIZE) / TSDB_NCHAR_SIZE)); } varDataSetLen(colTypeStr, colTypeLen); colDataAppend(pColInfoData, numOfRows, (char*)colTypeStr, false); @@ -1550,9 +1558,9 @@ static SSDataBlock* doSysTableScan(SOperatorInfo* pOperator) { if (pInfo->showRewrite) { getDBNameFromCondition(pInfo->pCondition, dbName); sprintf(pInfo->req.db, "%d.%s", pInfo->accountId, dbName); - }else if(strncasecmp(name, TSDB_INS_TABLE_COLS, TSDB_TABLE_FNAME_LEN) == 0){ + } else if (strncasecmp(name, TSDB_INS_TABLE_COLS, TSDB_TABLE_FNAME_LEN) == 0) { getDBNameFromCondition(pInfo->pCondition, dbName); - if(dbName[0]) sprintf(pInfo->req.db, "%d.%s", pInfo->accountId, dbName); + if (dbName[0]) sprintf(pInfo->req.db, "%d.%s", pInfo->accountId, dbName); sysTableIsCondOnOneTable(pInfo->pCondition, pInfo->req.filterTb); } @@ -1573,12 +1581,12 @@ static SSDataBlock* doSysTableScan(SOperatorInfo* pOperator) { return sysTableScanFillTbName(pOperator, pInfo, name, pBlock); } -static SSDataBlock* sysTableScanFillTbName(SOperatorInfo* pOperator, const SSysTableScanInfo* pInfo, - const char* name, SSDataBlock* pBlock) { +static SSDataBlock* sysTableScanFillTbName(SOperatorInfo* pOperator, const SSysTableScanInfo* pInfo, const char* name, + SSDataBlock* pBlock) { if (pBlock != NULL) { if (pInfo->tbnameSlotId != -1) { SColumnInfoData* pColumnInfoData = (SColumnInfoData*)taosArrayGet(pBlock->pDataBlock, pInfo->tbnameSlotId); - char varTbName[TSDB_TABLE_FNAME_LEN - 1 + VARSTR_HEADER_SIZE] = {0}; + char varTbName[TSDB_TABLE_FNAME_LEN - 1 + VARSTR_HEADER_SIZE] = {0}; memcpy(varDataVal(varTbName), name, strlen(name)); varDataSetLen(varTbName, strlen(name)); for (int i = 0; i < pBlock->info.rows; ++i) { @@ -1669,7 +1677,7 @@ static SSDataBlock* sysTableScanFromMNode(SOperatorInfo* pOperator, SSysTableSca SOperatorInfo* createSysTableScanOperatorInfo(void* readHandle, SSystemTableScanPhysiNode* pScanPhyNode, const char* pUser, SExecTaskInfo* pTaskInfo) { - int32_t code = TDB_CODE_SUCCESS; + int32_t code = TDB_CODE_SUCCESS; SSysTableScanInfo* pInfo = taosMemoryCalloc(1, sizeof(SSysTableScanInfo)); SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo)); if (pInfo == NULL || pOperator == NULL) { @@ -1717,10 +1725,11 @@ SOperatorInfo* createSysTableScanOperatorInfo(void* readHandle, SSystemTableScan setOperatorInfo(pOperator, "SysTableScanOperator", QUERY_NODE_PHYSICAL_PLAN_SYSTABLE_SCAN, false, OP_NOT_OPENED, pInfo, pTaskInfo); pOperator->exprSupp.numOfExprs = taosArrayGetSize(pInfo->pRes->pDataBlock); - pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doSysTableScan, NULL, destroySysScanOperator, optrDefaultBufFn, NULL); + pOperator->fpSet = + createOperatorFpSet(optrDummyOpenFn, doSysTableScan, NULL, destroySysScanOperator, optrDefaultBufFn, NULL); return pOperator; - _error: +_error: if (pInfo != NULL) { destroySysScanOperator(pInfo); } @@ -1757,7 +1766,7 @@ void destroySysScanOperator(void* param) { const char* name = tNameGetTableName(&pInfo->name); if (strncasecmp(name, TSDB_INS_TABLE_TABLES, TSDB_TABLE_FNAME_LEN) == 0 || strncasecmp(name, TSDB_INS_TABLE_TAGS, TSDB_TABLE_FNAME_LEN) == 0 || - strncasecmp(name, TSDB_INS_TABLE_COLS, TSDB_TABLE_FNAME_LEN) == 0|| pInfo->pCur != NULL) { + strncasecmp(name, TSDB_INS_TABLE_COLS, TSDB_TABLE_FNAME_LEN) == 0 || pInfo->pCur != NULL) { metaCloseTbCursor(pInfo->pCur); pInfo->pCur = NULL; } @@ -2165,7 +2174,7 @@ static SSDataBlock* doBlockInfoScan(SOperatorInfo* pOperator) { // make the valgrind happy that all memory buffer has been initialized already. if (slotId != 0) { SColumnInfoData* p1 = taosArrayGet(pBlock->pDataBlock, 0); - int64_t v = 0; + int64_t v = 0; colDataAppendInt64(p1, 0, &v); } @@ -2175,10 +2184,10 @@ static SSDataBlock* doBlockInfoScan(SOperatorInfo* pOperator) { } static void destroyBlockDistScanOperatorInfo(void* param) { - SBlockDistInfo* pDistInfo = (SBlockDistInfo*)param; - blockDataDestroy(pDistInfo->pResBlock); - tsdbReaderClose(pDistInfo->pHandle); - taosMemoryFreeClear(param); + SBlockDistInfo* pDistInfo = (SBlockDistInfo*)param; + blockDataDestroy(pDistInfo->pResBlock); + tsdbReaderClose(pDistInfo->pHandle); + taosMemoryFreeClear(param); } static int32_t initTableblockDistQueryCond(uint64_t uid, SQueryTableDataCond* pCond) { @@ -2250,8 +2259,8 @@ SOperatorInfo* createDataBlockInfoScanOperator(SReadHandle* readHandle, SBlockDi setOperatorInfo(pOperator, "DataBlockDistScanOperator", QUERY_NODE_PHYSICAL_PLAN_BLOCK_DIST_SCAN, false, OP_NOT_OPENED, pInfo, pTaskInfo); - pOperator->fpSet = - createOperatorFpSet(optrDummyOpenFn, doBlockInfoScan, NULL, destroyBlockDistScanOperatorInfo, optrDefaultBufFn, NULL); + pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doBlockInfoScan, NULL, destroyBlockDistScanOperatorInfo, + optrDefaultBufFn, NULL); return pOperator; _error: diff --git a/source/libs/function/src/builtinsimpl.c b/source/libs/function/src/builtinsimpl.c index 224537c1f110f9d3d629320d031baf1dc6e7d108..0c491addd58a48823514ef21f6e51969827a4270 100644 --- a/source/libs/function/src/builtinsimpl.c +++ b/source/libs/function/src/builtinsimpl.c @@ -3053,14 +3053,12 @@ static int32_t doSaveTupleData(SSerializeDataHandle* pHandle, const void* pBuf, if (pHandle->currentPage == -1) { pPage = getNewBufPage(pHandle->pBuf, &pHandle->currentPage); if (pPage == NULL) { - terrno = TSDB_CODE_NO_AVAIL_DISK; return terrno; } pPage->num = sizeof(SFilePage); } else { pPage = getBufPage(pHandle->pBuf, pHandle->currentPage); if (pPage == NULL) { - terrno = TSDB_CODE_NO_AVAIL_DISK; return terrno; } if (pPage->num + length > getBufPageSize(pHandle->pBuf)) { @@ -3068,7 +3066,6 @@ static int32_t doSaveTupleData(SSerializeDataHandle* pHandle, const void* pBuf, releaseBufPage(pHandle->pBuf, pPage); pPage = getNewBufPage(pHandle->pBuf, &pHandle->currentPage); if (pPage == NULL) { - terrno = TSDB_CODE_NO_AVAIL_DISK; return terrno; } pPage->num = sizeof(SFilePage); @@ -3115,7 +3112,6 @@ static int32_t doUpdateTupleData(SSerializeDataHandle* pHandle, const void* pBuf if (pHandle->pBuf != NULL) { SFilePage* pPage = getBufPage(pHandle->pBuf, pPos->pageId); if (pPage == NULL) { - terrno = TSDB_CODE_NO_AVAIL_DISK; return terrno; } memcpy(pPage->data + pPos->offset, pBuf, length); diff --git a/source/libs/function/src/tpercentile.c b/source/libs/function/src/tpercentile.c index 6577858ee693257b4ddf02f3ca1ab1d1234e40a6..97fe94b513f86f2a20f4c4e42b965c67840cc043 100644 --- a/source/libs/function/src/tpercentile.c +++ b/source/libs/function/src/tpercentile.c @@ -50,8 +50,8 @@ static SFilePage *loadDataFromFilePage(tMemBucket *pMemBucket, int32_t slotIdx) if (pg == NULL) { return NULL; } - memcpy(buffer->data + offset, pg->data, (size_t)(pg->num * pMemBucket->bytes)); + memcpy(buffer->data + offset, pg->data, (size_t)(pg->num * pMemBucket->bytes)); offset += (int32_t)(pg->num * pMemBucket->bytes); } @@ -116,7 +116,7 @@ int32_t findOnlyResult(tMemBucket *pMemBucket, double *result) { int32_t *pageId = taosArrayGet(list, 0); SFilePage *pPage = getBufPage(pMemBucket->pBuffer, *pageId); if (pPage == NULL) { - return TSDB_CODE_NO_AVAIL_DISK; + return terrno; } ASSERT(pPage->num == 1); @@ -283,7 +283,7 @@ tMemBucket *tMemBucketCreate(int16_t nElemSize, int16_t dataType, double minval, return NULL; } - int32_t ret = createDiskbasedBuf(&pBucket->pBuffer, pBucket->bufPageSize, pBucket->bufPageSize * 512, "1", tsTempDir); + int32_t ret = createDiskbasedBuf(&pBucket->pBuffer, pBucket->bufPageSize, pBucket->bufPageSize * 1024, "1", tsTempDir); if (ret != 0) { tMemBucketDestroy(pBucket); return NULL; @@ -395,7 +395,7 @@ int32_t tMemBucketPut(tMemBucket *pBucket, const void *data, size_t size) { pSlot->info.data = getNewBufPage(pBucket->pBuffer, &pageId); if (pSlot->info.data == NULL) { - return TSDB_CODE_NO_AVAIL_DISK; + return terrno; } pSlot->info.pageId = pageId; taosArrayPush(pPageIdList, &pageId); @@ -489,8 +489,9 @@ int32_t getPercentileImpl(tMemBucket *pMemBucket, int32_t count, double fraction // data in buffer and file are merged together to be processed. SFilePage *buffer = loadDataFromFilePage(pMemBucket, i); if (buffer == NULL) { - return TSDB_CODE_NO_AVAIL_DISK; + return terrno; } + int32_t currentIdx = count - num; char *thisVal = buffer->data + pMemBucket->bytes * currentIdx; @@ -536,7 +537,7 @@ int32_t getPercentileImpl(tMemBucket *pMemBucket, int32_t count, double fraction int32_t *pageId = taosArrayGet(list, f); SFilePage *pg = getBufPage(pMemBucket->pBuffer, *pageId); if (pg == NULL) { - return TSDB_CODE_NO_AVAIL_DISK; + return terrno; } int32_t code = tMemBucketPut(pMemBucket, pg->data, (int32_t)pg->num); diff --git a/source/libs/qworker/inc/qwInt.h b/source/libs/qworker/inc/qwInt.h index 66b8e48eb1a41d5669e008d3296359f208e0a19f..787ef7501da7e467e67975d20563ad7f875ecced 100644 --- a/source/libs/qworker/inc/qwInt.h +++ b/source/libs/qworker/inc/qwInt.h @@ -228,9 +228,14 @@ typedef struct SQWorkerMgmt { case QW_PHASE_POST_FETCH: \ ctx->inFetch = 0; \ break; \ - default: \ + case QW_PHASE_PRE_QUERY: \ + case QW_PHASE_POST_QUERY: \ + case QW_PHASE_PRE_CQUERY: \ + case QW_PHASE_POST_CQUERY: \ atomic_store_8(&(ctx)->phase, _value); \ break; \ + default: \ + break; \ } \ } while (0) diff --git a/source/libs/qworker/src/qworker.c b/source/libs/qworker/src/qworker.c index 2f4e8001df1aeca484b6898f1ae6a94d82af44c1..2f712e6eba50a6dd3e8c4768397eaf57ec505bff 100644 --- a/source/libs/qworker/src/qworker.c +++ b/source/libs/qworker/src/qworker.c @@ -550,7 +550,9 @@ _return: if (ctx) { QW_UPDATE_RSP_CODE(ctx, code); - QW_SET_PHASE(ctx, phase); + if (QW_PHASE_POST_CQUERY != phase) { + QW_SET_PHASE(ctx, phase); + } QW_UNLOCK(QW_WRITE, &ctx->lock); qwReleaseTaskCtx(mgmt, ctx); @@ -757,7 +759,7 @@ int32_t qwProcessCQuery(QW_FPARAMS_DEF, SQWMsg *qwMsg) { QW_LOCK(QW_WRITE, &ctx->lock); if (qComplete || (queryStop && (0 == atomic_load_8((int8_t *)&ctx->queryContinue))) || code) { // Note: query is not running anymore - QW_SET_PHASE(ctx, 0); + QW_SET_PHASE(ctx, QW_PHASE_POST_CQUERY); QW_UNLOCK(QW_WRITE, &ctx->lock); break; } diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 6f77769decdf2649b66e5cac0c8bbf823cce85e1..2f991288ffd0201be79ed3392befcd5da669294e 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -207,6 +207,7 @@ void streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId) { if (ppTask) { SStreamTask* pTask = *ppTask; taosHashRemove(pMeta->pTasks, &taskId, sizeof(int32_t)); + tdbTbDelete(pMeta->pTaskDb, &taskId, sizeof(int32_t), pMeta->txn); /*if (pTask->timer) { * taosTmrStop(pTask->timer);*/ /*pTask->timer = NULL;*/ diff --git a/source/libs/sync/src/syncAppendEntries.c b/source/libs/sync/src/syncAppendEntries.c index e77a8d4be333e1267aafb94c7da71e5a9651b385..835e5d248e345cbbb3206e35d67ddd20717009db 100644 --- a/source/libs/sync/src/syncAppendEntries.c +++ b/source/libs/sync/src/syncAppendEntries.c @@ -89,45 +89,6 @@ // /\ UNCHANGED <> // -int32_t syncNodeFollowerCommit(SSyncNode* ths, SyncIndex newCommitIndex) { - ASSERT(false && "deprecated"); - if (ths->state != TAOS_SYNC_STATE_FOLLOWER) { - sNTrace(ths, "can not do follower commit"); - return -1; - } - - // maybe update commit index, leader notice me - if (newCommitIndex > ths->commitIndex) { - // has commit entry in local - if (newCommitIndex <= ths->pLogStore->syncLogLastIndex(ths->pLogStore)) { - // advance commit index to sanpshot first - SSnapshot snapshot; - ths->pFsm->FpGetSnapshotInfo(ths->pFsm, &snapshot); - if (snapshot.lastApplyIndex >= 0 && snapshot.lastApplyIndex > ths->commitIndex) { - SyncIndex commitBegin = ths->commitIndex; - SyncIndex commitEnd = snapshot.lastApplyIndex; - ths->commitIndex = snapshot.lastApplyIndex; - sNTrace(ths, "commit by snapshot from index:%" PRId64 " to index:%" PRId64, commitBegin, commitEnd); - } - - SyncIndex beginIndex = ths->commitIndex + 1; - SyncIndex endIndex = newCommitIndex; - - // update commit index - ths->commitIndex = newCommitIndex; - - // call back Wal - int32_t code = ths->pLogStore->syncLogUpdateCommitIndex(ths->pLogStore, ths->commitIndex); - ASSERT(code == 0); - - code = syncNodeDoCommit(ths, beginIndex, endIndex, ths->state); - ASSERT(code == 0); - } - } - - return 0; -} - SSyncRaftEntry* syncBuildRaftEntryFromAppendEntries(const SyncAppendEntries* pMsg) { SSyncRaftEntry* pEntry = taosMemoryMalloc(pMsg->dataLen); if (pEntry == NULL) { @@ -232,256 +193,3 @@ _IGNORE: rpcFreeCont(rpcRsp.pCont); return 0; } - -int32_t syncNodeOnAppendEntriesOld(SSyncNode* ths, const SRpcMsg* pRpcMsg) { - SyncAppendEntries* pMsg = pRpcMsg->pCont; - SRpcMsg rpcRsp = {0}; - - // if already drop replica, do not process - if (!syncNodeInRaftGroup(ths, &(pMsg->srcId))) { - syncLogRecvAppendEntries(ths, pMsg, "not in my config"); - goto _IGNORE; - } - - // prepare response msg - int32_t code = syncBuildAppendEntriesReply(&rpcRsp, ths->vgId); - if (code != 0) { - syncLogRecvAppendEntries(ths, pMsg, "build rsp error"); - goto _IGNORE; - } - - SyncAppendEntriesReply* pReply = rpcRsp.pCont; - pReply->srcId = ths->myRaftId; - pReply->destId = pMsg->srcId; - pReply->term = ths->raftStore.currentTerm; - pReply->success = false; - // pReply->matchIndex = ths->pLogStore->syncLogLastIndex(ths->pLogStore); - pReply->matchIndex = SYNC_INDEX_INVALID; - pReply->lastSendIndex = pMsg->prevLogIndex + 1; - pReply->startTime = ths->startTime; - - if (pMsg->term < ths->raftStore.currentTerm) { - syncLogRecvAppendEntries(ths, pMsg, "reject, small term"); - goto _SEND_RESPONSE; - } - - if (pMsg->term > ths->raftStore.currentTerm) { - pReply->term = pMsg->term; - } - - syncNodeStepDown(ths, pMsg->term); - syncNodeResetElectTimer(ths); - - SyncIndex startIndex = ths->pLogStore->syncLogBeginIndex(ths->pLogStore); - SyncIndex lastIndex = ths->pLogStore->syncLogLastIndex(ths->pLogStore); - - if (pMsg->prevLogIndex > lastIndex) { - syncLogRecvAppendEntries(ths, pMsg, "reject, index not match"); - goto _SEND_RESPONSE; - } - - if (pMsg->prevLogIndex >= startIndex) { - SyncTerm myPreLogTerm = syncNodeGetPreTerm(ths, pMsg->prevLogIndex + 1); - // ASSERT(myPreLogTerm != SYNC_TERM_INVALID); - if (myPreLogTerm == SYNC_TERM_INVALID) { - syncLogRecvAppendEntries(ths, pMsg, "reject, pre-term invalid"); - goto _SEND_RESPONSE; - } - - if (myPreLogTerm != pMsg->prevLogTerm) { - syncLogRecvAppendEntries(ths, pMsg, "reject, pre-term not match"); - goto _SEND_RESPONSE; - } - } - - // accept - pReply->success = true; - bool hasAppendEntries = pMsg->dataLen > 0; - if (hasAppendEntries) { - SSyncRaftEntry* pAppendEntry = syncEntryBuildFromAppendEntries(pMsg); - ASSERT(pAppendEntry != NULL); - - SyncIndex appendIndex = pMsg->prevLogIndex + 1; - - LRUHandle* hLocal = NULL; - LRUHandle* hAppend = NULL; - - int32_t code = 0; - SSyncRaftEntry* pLocalEntry = NULL; - SLRUCache* pCache = ths->pLogStore->pCache; - hLocal = taosLRUCacheLookup(pCache, &appendIndex, sizeof(appendIndex)); - if (hLocal) { - pLocalEntry = (SSyncRaftEntry*)taosLRUCacheValue(pCache, hLocal); - code = 0; - - ths->pLogStore->cacheHit++; - sNTrace(ths, "hit cache index:%" PRId64 ", bytes:%u, %p", appendIndex, pLocalEntry->bytes, pLocalEntry); - - } else { - ths->pLogStore->cacheMiss++; - sNTrace(ths, "miss cache index:%" PRId64, appendIndex); - - code = ths->pLogStore->syncLogGetEntry(ths->pLogStore, appendIndex, &pLocalEntry); - } - - if (code == 0) { - // get local entry success - - if (pLocalEntry->term == pAppendEntry->term) { - // do nothing - sNTrace(ths, "log match, do nothing, index:%" PRId64, appendIndex); - - } else { - // truncate - code = ths->pLogStore->syncLogTruncate(ths->pLogStore, appendIndex); - if (code != 0) { - char logBuf[128]; - snprintf(logBuf, sizeof(logBuf), "ignore, truncate error, append-index:%" PRId64, appendIndex); - syncLogRecvAppendEntries(ths, pMsg, logBuf); - - if (hLocal) { - taosLRUCacheRelease(ths->pLogStore->pCache, hLocal, false); - } else { - syncEntryDestroy(pLocalEntry); - } - - if (hAppend) { - taosLRUCacheRelease(ths->pLogStore->pCache, hAppend, false); - } else { - syncEntryDestroy(pAppendEntry); - } - - goto _IGNORE; - } - - ASSERT(pAppendEntry->index == appendIndex); - - // append - code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pAppendEntry); - if (code != 0) { - char logBuf[128]; - snprintf(logBuf, sizeof(logBuf), "ignore, append error, append-index:%" PRId64, appendIndex); - syncLogRecvAppendEntries(ths, pMsg, logBuf); - - if (hLocal) { - taosLRUCacheRelease(ths->pLogStore->pCache, hLocal, false); - } else { - syncEntryDestroy(pLocalEntry); - } - - if (hAppend) { - taosLRUCacheRelease(ths->pLogStore->pCache, hAppend, false); - } else { - syncEntryDestroy(pAppendEntry); - } - - goto _IGNORE; - } - - syncCacheEntry(ths->pLogStore, pAppendEntry, &hAppend); - } - - } else { - if (terrno == TSDB_CODE_WAL_LOG_NOT_EXIST) { - // log not exist - - // truncate - code = ths->pLogStore->syncLogTruncate(ths->pLogStore, appendIndex); - if (code != 0) { - char logBuf[128]; - snprintf(logBuf, sizeof(logBuf), "ignore, log not exist, truncate error, append-index:%" PRId64, appendIndex); - syncLogRecvAppendEntries(ths, pMsg, logBuf); - - syncEntryDestroy(pLocalEntry); - syncEntryDestroy(pAppendEntry); - goto _IGNORE; - } - - // append - code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pAppendEntry); - if (code != 0) { - char logBuf[128]; - snprintf(logBuf, sizeof(logBuf), "ignore, log not exist, append error, append-index:%" PRId64, appendIndex); - syncLogRecvAppendEntries(ths, pMsg, logBuf); - - if (hLocal) { - taosLRUCacheRelease(ths->pLogStore->pCache, hLocal, false); - } else { - syncEntryDestroy(pLocalEntry); - } - - if (hAppend) { - taosLRUCacheRelease(ths->pLogStore->pCache, hAppend, false); - } else { - syncEntryDestroy(pAppendEntry); - } - - goto _IGNORE; - } - - syncCacheEntry(ths->pLogStore, pAppendEntry, &hAppend); - - } else { - // get local entry success - char logBuf[128]; - snprintf(logBuf, sizeof(logBuf), "ignore, get local entry error, append-index:%" PRId64 " err:%d", appendIndex, - terrno); - syncLogRecvAppendEntries(ths, pMsg, logBuf); - - if (hLocal) { - taosLRUCacheRelease(ths->pLogStore->pCache, hLocal, false); - } else { - syncEntryDestroy(pLocalEntry); - } - - if (hAppend) { - taosLRUCacheRelease(ths->pLogStore->pCache, hAppend, false); - } else { - syncEntryDestroy(pAppendEntry); - } - - goto _IGNORE; - } - } - - // update match index - pReply->matchIndex = pAppendEntry->index; - - if (hLocal) { - taosLRUCacheRelease(ths->pLogStore->pCache, hLocal, false); - } else { - syncEntryDestroy(pLocalEntry); - } - - if (hAppend) { - taosLRUCacheRelease(ths->pLogStore->pCache, hAppend, false); - } else { - syncEntryDestroy(pAppendEntry); - } - - } else { - // no append entries, do nothing - // maybe has extra entries, no harm - - // update match index - pReply->matchIndex = pMsg->prevLogIndex; - } - - // maybe update commit index, leader notice me - syncNodeFollowerCommit(ths, pMsg->commitIndex); - - syncLogRecvAppendEntries(ths, pMsg, "accept"); - goto _SEND_RESPONSE; - -_IGNORE: - rpcFreeCont(rpcRsp.pCont); - return 0; - -_SEND_RESPONSE: - // msg event log - syncLogSendAppendEntriesReply(ths, pReply, ""); - - // send response - syncNodeSendMsgById(&pReply->destId, ths, &rpcRsp); - return 0; -} diff --git a/source/libs/sync/src/syncAppendEntriesReply.c b/source/libs/sync/src/syncAppendEntriesReply.c index 8157a5a14f9c5275036d14f8917aebf3dfe05a26..44a29da3ea0e54d4e9932183a67d298a9c6239ed 100644 --- a/source/libs/sync/src/syncAppendEntriesReply.c +++ b/source/libs/sync/src/syncAppendEntriesReply.c @@ -89,63 +89,3 @@ int32_t syncNodeOnAppendEntriesReply(SSyncNode* ths, const SRpcMsg* pRpcMsg) { } return 0; } - -int32_t syncNodeOnAppendEntriesReplyOld(SSyncNode* ths, SyncAppendEntriesReply* pMsg) { - int32_t ret = 0; - - // if already drop replica, do not process - if (!syncNodeInRaftGroup(ths, &(pMsg->srcId))) { - syncLogRecvAppendEntriesReply(ths, pMsg, "not in my config"); - return 0; - } - - // drop stale response - if (pMsg->term < ths->raftStore.currentTerm) { - syncLogRecvAppendEntriesReply(ths, pMsg, "drop stale response"); - return 0; - } - - if (ths->state == TAOS_SYNC_STATE_LEADER) { - if (pMsg->term > ths->raftStore.currentTerm) { - syncLogRecvAppendEntriesReply(ths, pMsg, "error term"); - syncNodeStepDown(ths, pMsg->term); - return -1; - } - - ASSERT(pMsg->term == ths->raftStore.currentTerm); - - if (pMsg->success) { - SyncIndex oldMatchIndex = syncIndexMgrGetIndex(ths->pMatchIndex, &(pMsg->srcId)); - if (pMsg->matchIndex > oldMatchIndex) { - syncIndexMgrSetIndex(ths->pMatchIndex, &(pMsg->srcId), pMsg->matchIndex); - syncMaybeAdvanceCommitIndex(ths); - - // maybe update minMatchIndex - ths->minMatchIndex = syncMinMatchIndex(ths); - } - syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), pMsg->matchIndex + 1); - - } else { - SyncIndex nextIndex = syncIndexMgrGetIndex(ths->pNextIndex, &(pMsg->srcId)); - if (nextIndex > SYNC_INDEX_BEGIN) { - --nextIndex; - } - syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), nextIndex); - } - - // send next append entries - SPeerState* pState = syncNodeGetPeerState(ths, &(pMsg->srcId)); - ASSERT(pState != NULL); - - if (pMsg->lastSendIndex == pState->lastSendIndex) { - int64_t timeNow = taosGetTimestampMs(); - int64_t elapsed = timeNow - pState->lastSendTime; - sNTrace(ths, "sync-append-entries rtt elapsed:%" PRId64 ", index:%" PRId64, elapsed, pState->lastSendIndex); - - syncNodeReplicateOne(ths, &(pMsg->srcId), true); - } - } - - syncLogRecvAppendEntriesReply(ths, pMsg, "process"); - return 0; -} diff --git a/source/libs/sync/src/syncCommit.c b/source/libs/sync/src/syncCommit.c index 286cf4daf52a0ef206ff4d2bf285b673dd1a7b1a..67ed1e0701eebefff06870af66611acdbd3bb681 100644 --- a/source/libs/sync/src/syncCommit.c +++ b/source/libs/sync/src/syncCommit.c @@ -43,148 +43,6 @@ // IN commitIndex' = [commitIndex EXCEPT ![i] = newCommitIndex] // /\ UNCHANGED <> // -void syncOneReplicaAdvance(SSyncNode* pSyncNode) { - ASSERT(false && "deprecated"); - if (pSyncNode == NULL) { - sError("pSyncNode is NULL"); - return; - } - - if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) { - sNError(pSyncNode, "not leader, can not advance commit index"); - return; - } - - if (pSyncNode->replicaNum != 1) { - sNError(pSyncNode, "not one replica, can not advance commit index"); - return; - } - - // advance commit index to snapshot first - SSnapshot snapshot; - pSyncNode->pFsm->FpGetSnapshotInfo(pSyncNode->pFsm, &snapshot); - if (snapshot.lastApplyIndex > 0 && snapshot.lastApplyIndex > pSyncNode->commitIndex) { - SyncIndex commitBegin = pSyncNode->commitIndex; - SyncIndex commitEnd = snapshot.lastApplyIndex; - pSyncNode->commitIndex = snapshot.lastApplyIndex; - sNTrace(pSyncNode, "commit by snapshot from index:%" PRId64 " to index:%" PRId64, commitBegin, commitEnd); - } - - // advance commit index as large as possible - SyncIndex lastIndex = syncNodeGetLastIndex(pSyncNode); - if (lastIndex > pSyncNode->commitIndex) { - sNTrace(pSyncNode, "commit by wal from index:%" PRId64 " to index:%" PRId64, pSyncNode->commitIndex + 1, lastIndex); - pSyncNode->commitIndex = lastIndex; - } - - // call back Wal - SyncIndex walCommitVer = logStoreWalCommitVer(pSyncNode->pLogStore); - if (pSyncNode->commitIndex > walCommitVer) { - pSyncNode->pLogStore->syncLogUpdateCommitIndex(pSyncNode->pLogStore, pSyncNode->commitIndex); - } -} - -void syncMaybeAdvanceCommitIndex(SSyncNode* pSyncNode) { - ASSERTS(false, "deprecated"); - if (pSyncNode == NULL) { - sError("pSyncNode is NULL"); - return; - } - - if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) { - sNError(pSyncNode, "not leader, can not advance commit index"); - return; - } - - // advance commit index to sanpshot first - SSnapshot snapshot; - pSyncNode->pFsm->FpGetSnapshotInfo(pSyncNode->pFsm, &snapshot); - if (snapshot.lastApplyIndex > 0 && snapshot.lastApplyIndex > pSyncNode->commitIndex) { - SyncIndex commitBegin = pSyncNode->commitIndex; - SyncIndex commitEnd = snapshot.lastApplyIndex; - pSyncNode->commitIndex = snapshot.lastApplyIndex; - sNTrace(pSyncNode, "commit by snapshot from index:%" PRId64 " to index:%" PRId64, commitBegin, commitEnd); - } - - // update commit index - SyncIndex newCommitIndex = pSyncNode->commitIndex; - for (SyncIndex index = syncNodeGetLastIndex(pSyncNode); index > pSyncNode->commitIndex; --index) { - bool agree = syncAgree(pSyncNode, index); - - if (agree) { - // term - SSyncRaftEntry* pEntry = NULL; - SLRUCache* pCache = pSyncNode->pLogStore->pCache; - LRUHandle* h = taosLRUCacheLookup(pCache, &index, sizeof(index)); - if (h) { - pEntry = (SSyncRaftEntry*)taosLRUCacheValue(pCache, h); - - pSyncNode->pLogStore->cacheHit++; - sNTrace(pSyncNode, "hit cache index:%" PRId64 ", bytes:%u, %p", index, pEntry->bytes, pEntry); - - } else { - pSyncNode->pLogStore->cacheMiss++; - sNTrace(pSyncNode, "miss cache index:%" PRId64, index); - - int32_t code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, index, &pEntry); - if (code != 0) { - sNError(pSyncNode, "advance commit index error, read wal index:%" PRId64, index); - return; - } - } - // cannot commit, even if quorum agree. need check term! - if (pEntry->term <= pSyncNode->raftStore.currentTerm) { - // update commit index - newCommitIndex = index; - - if (h) { - taosLRUCacheRelease(pCache, h, false); - } else { - syncEntryDestroy(pEntry); - } - - break; - } else { - sNTrace(pSyncNode, "can not commit due to term not equal, index:%" PRId64 ", term:%" PRIu64, pEntry->index, - pEntry->term); - } - - if (h) { - taosLRUCacheRelease(pCache, h, false); - } else { - syncEntryDestroy(pEntry); - } - } - } - - // advance commit index as large as possible - SyncIndex walCommitVer = logStoreWalCommitVer(pSyncNode->pLogStore); - if (walCommitVer > newCommitIndex) { - newCommitIndex = walCommitVer; - } - - // maybe execute fsm - if (newCommitIndex > pSyncNode->commitIndex) { - SyncIndex beginIndex = pSyncNode->commitIndex + 1; - SyncIndex endIndex = newCommitIndex; - - // update commit index - pSyncNode->commitIndex = newCommitIndex; - - // call back Wal - pSyncNode->pLogStore->syncLogUpdateCommitIndex(pSyncNode->pLogStore, pSyncNode->commitIndex); - - // execute fsm - if (pSyncNode != NULL && pSyncNode->pFsm != NULL) { - int32_t code = syncNodeDoCommit(pSyncNode, beginIndex, endIndex, pSyncNode->state); - if (code != 0) { - sNError(pSyncNode, "advance commit index error, do commit begin:%" PRId64 ", end:%" PRId64, beginIndex, - endIndex); - return; - } - } - } -} bool syncAgreeIndex(SSyncNode* pSyncNode, SRaftId* pRaftId, SyncIndex index) { // I am leader, I agree @@ -210,83 +68,7 @@ static inline int64_t syncNodeAbs64(int64_t a, int64_t b) { return c; } -int32_t syncNodeDynamicQuorum(const SSyncNode* pSyncNode) { - return pSyncNode->quorum; - -#if 0 - int32_t quorum = 1; // self - - int64_t timeNow = taosGetTimestampMs(); - for (int i = 0; i < pSyncNode->peersNum; ++i) { - int64_t peerStartTime = syncIndexMgrGetStartTime(pSyncNode->pNextIndex, &(pSyncNode->peersId)[i]); - int64_t peerRecvTime = syncIndexMgrGetRecvTime(pSyncNode->pNextIndex, &(pSyncNode->peersId)[i]); - SyncIndex peerMatchIndex = syncIndexMgrGetIndex(pSyncNode->pMatchIndex, &(pSyncNode->peersId)[i]); - - int64_t recvTimeDiff = TABS(peerRecvTime - timeNow); - int64_t startTimeDiff = TABS(peerStartTime - pSyncNode->startTime); - int64_t logDiff = TABS(peerMatchIndex - syncNodeGetLastIndex(pSyncNode)); - - /* - int64_t recvTimeDiff = syncNodeAbs64(peerRecvTime, timeNow); - int64_t startTimeDiff = syncNodeAbs64(peerStartTime, pSyncNode->startTime); - int64_t logDiff = syncNodeAbs64(peerMatchIndex, syncNodeGetLastIndex(pSyncNode)); - */ - - int32_t addQuorum = 0; - - if (recvTimeDiff < SYNC_MAX_RECV_TIME_RANGE_MS) { - if (startTimeDiff < SYNC_MAX_START_TIME_RANGE_MS) { - addQuorum = 1; - } else { - if (logDiff < SYNC_ADD_QUORUM_COUNT) { - addQuorum = 1; - } else { - addQuorum = 0; - } - } - } else { - addQuorum = 0; - } - - /* - if (recvTimeDiff < SYNC_MAX_RECV_TIME_RANGE_MS) { - addQuorum = 1; - } else { - addQuorum = 0; - } - - if (startTimeDiff > SYNC_MAX_START_TIME_RANGE_MS) { - addQuorum = 0; - } - */ - - quorum += addQuorum; - } - - ASSERT(quorum <= pSyncNode->replicaNum); - - if (quorum < pSyncNode->quorum) { - quorum = pSyncNode->quorum; - } - - return quorum; -#endif -} - -/* -bool syncAgree(SSyncNode* pSyncNode, SyncIndex index) { - int agreeCount = 0; - for (int i = 0; i < pSyncNode->replicaNum; ++i) { - if (syncAgreeIndex(pSyncNode, &(pSyncNode->replicasId[i]), index)) { - ++agreeCount; - } - if (agreeCount >= syncNodeDynamicQuorum(pSyncNode)) { - return true; - } - } - return false; -} -*/ +int32_t syncNodeDynamicQuorum(const SSyncNode* pSyncNode) { return pSyncNode->quorum; } bool syncNodeAgreedUpon(SSyncNode* pNode, SyncIndex index) { int count = 0; diff --git a/source/libs/sync/src/syncElection.c b/source/libs/sync/src/syncElection.c index cd3ffc33e34fef0047f9e92f319a5fadaf271419..682ace83ecfa99e4781f70915048cf62a5e2d76f 100644 --- a/source/libs/sync/src/syncElection.c +++ b/source/libs/sync/src/syncElection.c @@ -43,7 +43,10 @@ static int32_t syncNodeRequestVotePeers(SSyncNode* pNode) { for (int i = 0; i < pNode->peersNum; ++i) { SRpcMsg rpcMsg = {0}; ret = syncBuildRequestVote(&rpcMsg, pNode->vgId); - ASSERT(ret == 0); + if (ret < 0) { + sError("vgId:%d, failed to build request-vote msg since %s", pNode->vgId, terrstr()); + continue; + } SyncRequestVote* pMsg = rpcMsg.pCont; pMsg->srcId = pNode->myRaftId; @@ -51,13 +54,18 @@ static int32_t syncNodeRequestVotePeers(SSyncNode* pNode) { pMsg->term = pNode->raftStore.currentTerm; ret = syncNodeGetLastIndexTerm(pNode, &pMsg->lastLogIndex, &pMsg->lastLogTerm); - ASSERT(ret == 0); + if (ret < 0) { + sError("vgId:%d, failed to get index and term of last log since %s", pNode->vgId, terrstr()); + continue; + } ret = syncNodeSendMsgById(&pNode->peersId[i], pNode, &rpcMsg); - ASSERT(ret == 0); + if (ret < 0) { + sError("vgId:%d, failed to send msg to peerId:%" PRId64, pNode->vgId, pNode->peersId[i].addr); + continue; + } } - - return ret; + return 0; } int32_t syncNodeElect(SSyncNode* pSyncNode) { diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index ac377911eb25510d0a1f8a0dd3453dc1c2b21ff8..77b87a885ba53166cbdfe9f2d792da9421803fc1 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -292,8 +292,6 @@ int32_t syncBeginSnapshot(int64_t rid, int64_t lastApplyIndex) { goto _DEL_WAL; } else { - lastApplyIndex -= SYNC_VNODE_LOG_RETENTION; - SyncIndex beginIndex = pSyncNode->pLogStore->syncLogBeginIndex(pSyncNode->pLogStore); SyncIndex endIndex = pSyncNode->pLogStore->syncLogEndIndex(pSyncNode->pLogStore); bool isEmpty = pSyncNode->pLogStore->syncLogIsEmpty(pSyncNode->pLogStore); @@ -308,6 +306,8 @@ int32_t syncBeginSnapshot(int64_t rid, int64_t lastApplyIndex) { if (pSyncNode->replicaNum > 1) { // multi replicas + lastApplyIndex = TMAX(lastApplyIndex - SYNC_VNODE_LOG_RETENTION, beginIndex - 1); + if (pSyncNode->state == TAOS_SYNC_STATE_LEADER) { pSyncNode->minMatchIndex = syncMinMatchIndex(pSyncNode); @@ -586,78 +586,6 @@ SSyncState syncGetState(int64_t rid) { return state; } -#if 0 -int32_t syncGetSnapshotByIndex(int64_t rid, SyncIndex index, SSnapshot* pSnapshot) { - if (index < SYNC_INDEX_BEGIN) { - return -1; - } - - SSyncNode* pSyncNode = syncNodeAcquire(rid); - if (pSyncNode == NULL) { - return -1; - } - ASSERT(rid == pSyncNode->rid); - - SSyncRaftEntry* pEntry = NULL; - int32_t code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, index, &pEntry); - if (code != 0) { - if (pEntry != NULL) { - syncEntryDestroy(pEntry); - } - syncNodeRelease(pSyncNode); - return -1; - } - ASSERT(pEntry != NULL); - - pSnapshot->data = NULL; - pSnapshot->lastApplyIndex = index; - pSnapshot->lastApplyTerm = pEntry->term; - pSnapshot->lastConfigIndex = syncNodeGetSnapshotConfigIndex(pSyncNode, index); - - syncEntryDestroy(pEntry); - syncNodeRelease(pSyncNode); - return 0; -} - -int32_t syncGetSnapshotMeta(int64_t rid, struct SSnapshotMeta* sMeta) { - SSyncNode* pSyncNode = syncNodeAcquire(rid); - if (pSyncNode == NULL) { - return -1; - } - ASSERT(rid == pSyncNode->rid); - sMeta->lastConfigIndex = pSyncNode->raftCfg.lastConfigIndex; - - sTrace("vgId:%d, get snapshot meta, lastConfigIndex:%" PRId64, pSyncNode->vgId, pSyncNode->raftCfg.lastConfigIndex); - - syncNodeRelease(pSyncNode); - return 0; -} - -int32_t syncGetSnapshotMetaByIndex(int64_t rid, SyncIndex snapshotIndex, struct SSnapshotMeta* sMeta) { - SSyncNode* pSyncNode = syncNodeAcquire(rid); - if (pSyncNode == NULL) { - return -1; - } - ASSERT(rid == pSyncNode->rid); - - ASSERT(pSyncNode->raftCfg.configIndexCount >= 1); - SyncIndex lastIndex = (pSyncNode->raftCfg.configIndexArr)[0]; - - for (int32_t i = 0; i < pSyncNode->raftCfg.configIndexCount; ++i) { - if ((pSyncNode->raftCfg.configIndexArr)[i] > lastIndex && - (pSyncNode->raftCfg.configIndexArr)[i] <= snapshotIndex) { - lastIndex = (pSyncNode->raftCfg.configIndexArr)[i]; - } - } - sMeta->lastConfigIndex = lastIndex; - sTrace("vgId:%d, get snapshot meta by index:%" PRId64 " lcindex:%" PRId64, pSyncNode->vgId, snapshotIndex, - sMeta->lastConfigIndex); - - syncNodeRelease(pSyncNode); - return 0; -} -#endif - SyncIndex syncNodeGetSnapshotConfigIndex(SSyncNode* pSyncNode, SyncIndex snapshotLastApplyIndex) { ASSERT(pSyncNode->raftCfg.configIndexCount >= 1); SyncIndex lastIndex = (pSyncNode->raftCfg.configIndexArr)[0]; @@ -1042,9 +970,12 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) { pSyncNode->commitIndex = commitIndex; sInfo("vgId:%d, sync node commitIndex initialized as %" PRId64, pSyncNode->vgId, pSyncNode->commitIndex); + // restore log store on need if (syncNodeLogStoreRestoreOnNeed(pSyncNode) < 0) { + sError("vgId:%d, failed to restore log store since %s.", pSyncNode->vgId, terrstr()); goto _error; } + // timer ms init pSyncNode->pingBaseLine = PING_TIMER_MS; pSyncNode->electBaseLine = tsElectInterval; @@ -1107,10 +1038,16 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) { pSyncNode->changing = false; // replication mgr - syncNodeLogReplMgrInit(pSyncNode); + if (syncNodeLogReplMgrInit(pSyncNode) < 0) { + sError("vgId:%d, failed to init repl mgr since %s.", pSyncNode->vgId, terrstr()); + goto _error; + } // peer state - syncNodePeerStateInit(pSyncNode); + if (syncNodePeerStateInit(pSyncNode) < 0) { + sError("vgId:%d, failed to init peer stat since %s.", pSyncNode->vgId, terrstr()); + goto _error; + } // // min match index @@ -1205,27 +1142,10 @@ int32_t syncNodeStart(SSyncNode* pSyncNode) { int32_t ret = 0; ret = syncNodeStartPingTimer(pSyncNode); - ASSERT(ret == 0); - return ret; -} - -void syncNodeStartOld(SSyncNode* pSyncNode) { - // start raft - if (pSyncNode->replicaNum == 1) { - raftStoreNextTerm(pSyncNode); - syncNodeBecomeLeader(pSyncNode, "one replica start"); - - // Raft 3.6.2 Committing entries from previous terms - syncNodeAppendNoop(pSyncNode); - syncMaybeAdvanceCommitIndex(pSyncNode); - - } else { - syncNodeBecomeFollower(pSyncNode, "first start"); + if (ret != 0) { + sError("vgId:%d, failed to start ping timer since %s", pSyncNode->vgId, terrstr()); } - - int32_t ret = 0; - ret = syncNodeStartPingTimer(pSyncNode); - ASSERT(ret == 0); + return ret; } int32_t syncNodeStartStandBy(SSyncNode* pSyncNode) { @@ -1236,11 +1156,16 @@ int32_t syncNodeStartStandBy(SSyncNode* pSyncNode) { // reset elect timer, long enough int32_t electMS = TIMER_MAX_MS; int32_t ret = syncNodeRestartElectTimer(pSyncNode, electMS); - ASSERT(ret == 0); + if (ret < 0) { + sError("vgId:%d, failed to restart elect timer since %s", pSyncNode->vgId, terrstr()); + return -1; + } - ret = 0; ret = syncNodeStartPingTimer(pSyncNode); - ASSERT(ret == 0); + if (ret < 0) { + sError("vgId:%d, failed to start ping timer since %s", pSyncNode->vgId, terrstr()); + return -1; + } return ret; } @@ -1829,12 +1754,6 @@ void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr) { pSyncNode->leaderCache = pSyncNode->myRaftId; for (int32_t i = 0; i < pSyncNode->pNextIndex->replicaNum; ++i) { - // maybe overwrite myself, no harm - // just do it! - - // pSyncNode->pNextIndex->index[i] = pSyncNode->pLogStore->getLastIndex(pSyncNode->pLogStore) + 1; - - // maybe wal is deleted SyncIndex lastIndex; SyncTerm lastTerm; int32_t code = syncNodeGetLastIndexTerm(pSyncNode, &lastIndex, &lastTerm); @@ -1896,7 +1815,11 @@ void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr) { void syncNodeCandidate2Leader(SSyncNode* pSyncNode) { ASSERT(pSyncNode->state == TAOS_SYNC_STATE_CANDIDATE); - ASSERT(voteGrantedMajority(pSyncNode->pVotesGranted)); + bool granted = voteGrantedMajority(pSyncNode->pVotesGranted); + if (!granted) { + sError("vgId:%d, not granted by majority.", pSyncNode->vgId); + return; + } syncNodeBecomeLeader(pSyncNode, "candidate to leader"); sNTrace(pSyncNode, "state change syncNodeCandidate2Leader"); @@ -1912,20 +1835,6 @@ void syncNodeCandidate2Leader(SSyncNode* pSyncNode) { pSyncNode->vgId, pSyncNode->raftStore.currentTerm, pSyncNode->commitIndex, lastIndex); } -void syncNodeCandidate2LeaderOld(SSyncNode* pSyncNode) { - ASSERT(pSyncNode->state == TAOS_SYNC_STATE_CANDIDATE); - ASSERT(voteGrantedMajority(pSyncNode->pVotesGranted)); - syncNodeBecomeLeader(pSyncNode, "candidate to leader"); - - // Raft 3.6.2 Committing entries from previous terms - syncNodeAppendNoop(pSyncNode); - syncMaybeAdvanceCommitIndex(pSyncNode); - - if (pSyncNode->replicaNum > 1) { - syncNodeReplicate(pSyncNode); - } -} - bool syncNodeIsMnode(SSyncNode* pSyncNode) { return (pSyncNode->vgId == 1); } int32_t syncNodePeerStateInit(SSyncNode* pSyncNode) { @@ -1971,7 +1880,8 @@ void syncNodeCandidate2Follower(SSyncNode* pSyncNode) { // need assert void syncNodeVoteForTerm(SSyncNode* pSyncNode, SyncTerm term, SRaftId* pRaftId) { ASSERT(term == pSyncNode->raftStore.currentTerm); - ASSERT(!raftStoreHasVoted(pSyncNode)); + bool voted = raftStoreHasVoted(pSyncNode); + ASSERT(!voted); raftStoreVote(pSyncNode, pRaftId); } @@ -2488,7 +2398,7 @@ static int32_t syncNodeAppendNoopOld(SSyncNode* ths) { LRUHandle* h = NULL; if (ths->state == TAOS_SYNC_STATE_LEADER) { - int32_t code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pEntry); + int32_t code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pEntry, false); if (code != 0) { sError("append noop error"); return -1; @@ -2649,24 +2559,6 @@ int32_t syncNodeOnLocalCmd(SSyncNode* ths, const SRpcMsg* pRpcMsg) { return 0; } -int32_t syncNodeOnLocalCmdOld(SSyncNode* ths, const SRpcMsg* pRpcMsg) { - ASSERT(false && "deprecated"); - SyncLocalCmd* pMsg = pRpcMsg->pCont; - syncLogRecvLocalCmd(ths, pMsg, ""); - - if (pMsg->cmd == SYNC_LOCAL_CMD_STEP_DOWN) { - syncNodeStepDown(ths, pMsg->currentTerm); - - } else if (pMsg->cmd == SYNC_LOCAL_CMD_FOLLOWER_CMT) { - syncNodeFollowerCommit(ths, pMsg->commitIndex); - - } else { - sError("error local cmd"); - } - - return 0; -} - // TLA+ Spec // ClientRequest(i, v) == // /\ state[i] = Leader @@ -2711,96 +2603,6 @@ int32_t syncNodeOnClientRequest(SSyncNode* ths, SRpcMsg* pMsg, SyncIndex* pRetIn } } -int32_t syncNodeOnClientRequestOld(SSyncNode* ths, SRpcMsg* pMsg, SyncIndex* pRetIndex) { - sNTrace(ths, "on client request"); - - int32_t ret = 0; - int32_t code = 0; - - SyncIndex index = ths->pLogStore->syncLogWriteIndex(ths->pLogStore); - SyncTerm term = ths->raftStore.currentTerm; - SSyncRaftEntry* pEntry; - - if (pMsg->msgType == TDMT_SYNC_CLIENT_REQUEST) { - pEntry = syncEntryBuildFromClientRequest(pMsg->pCont, term, index); - } else { - pEntry = syncEntryBuildFromRpcMsg(pMsg, term, index); - } - - LRUHandle* h = NULL; - - if (ths->state == TAOS_SYNC_STATE_LEADER) { - // append entry - code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pEntry); - if (code != 0) { - if (ths->replicaNum == 1) { - if (h) { - taosLRUCacheRelease(ths->pLogStore->pCache, h, false); - } else { - syncEntryDestroy(pEntry); - } - - return -1; - - } else { - // del resp mgr, call FpCommitCb - SFsmCbMeta cbMeta = { - .index = pEntry->index, - .lastConfigIndex = SYNC_INDEX_INVALID, - .isWeak = pEntry->isWeak, - .code = -1, - .state = ths->state, - .seqNum = pEntry->seqNum, - .term = pEntry->term, - .currentTerm = ths->raftStore.currentTerm, - .flag = 0, - }; - ths->pFsm->FpCommitCb(ths->pFsm, pMsg, &cbMeta); - - if (h) { - taosLRUCacheRelease(ths->pLogStore->pCache, h, false); - } else { - syncEntryDestroy(pEntry); - } - - return -1; - } - } - - syncCacheEntry(ths->pLogStore, pEntry, &h); - - // if mulit replica, start replicate right now - if (ths->replicaNum > 1) { - syncNodeReplicate(ths); - } - - // if only myself, maybe commit right now - if (ths->replicaNum == 1) { - if (syncNodeIsMnode(ths)) { - syncMaybeAdvanceCommitIndex(ths); - } else { - syncOneReplicaAdvance(ths); - } - } - } - - if (pRetIndex != NULL) { - if (ret == 0 && pEntry != NULL) { - *pRetIndex = pEntry->index; - } else { - *pRetIndex = SYNC_INDEX_INVALID; - } - } - - if (h) { - taosLRUCacheRelease(ths->pLogStore->pCache, h, false); - } else { - syncEntryDestroy(pEntry); - } - - return ret; -} - const char* syncStr(ESyncState state) { switch (state) { case TAOS_SYNC_STATE_FOLLOWER: @@ -2905,129 +2707,6 @@ bool syncNodeIsOptimizedOneReplica(SSyncNode* ths, SRpcMsg* pMsg) { return (ths->replicaNum == 1 && syncUtilUserCommit(pMsg->msgType) && ths->vgId != 1); } -int32_t syncNodeDoCommit(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endIndex, uint64_t flag) { - ASSERT(false); - if (beginIndex > endIndex) { - return 0; - } - - if (ths == NULL) { - return -1; - } - - if (ths->pFsm != NULL && ths->pFsm->FpGetSnapshotInfo != NULL) { - // advance commit index to sanpshot first - SSnapshot snapshot = {0}; - ths->pFsm->FpGetSnapshotInfo(ths->pFsm, &snapshot); - if (snapshot.lastApplyIndex >= 0 && snapshot.lastApplyIndex >= beginIndex) { - sNTrace(ths, "commit by snapshot from index:%" PRId64 " to index:%" PRId64, beginIndex, snapshot.lastApplyIndex); - - // update begin index - beginIndex = snapshot.lastApplyIndex + 1; - } - } - - int32_t code = 0; - ESyncState state = flag; - - sNTrace(ths, "commit by wal from index:%" PRId64 " to index:%" PRId64, beginIndex, endIndex); - - // execute fsm - if (ths->pFsm != NULL) { - for (SyncIndex i = beginIndex; i <= endIndex; ++i) { - if (i != SYNC_INDEX_INVALID) { - SSyncRaftEntry* pEntry; - SLRUCache* pCache = ths->pLogStore->pCache; - LRUHandle* h = taosLRUCacheLookup(pCache, &i, sizeof(i)); - if (h) { - pEntry = (SSyncRaftEntry*)taosLRUCacheValue(pCache, h); - - ths->pLogStore->cacheHit++; - sNTrace(ths, "hit cache index:%" PRId64 ", bytes:%u, %p", i, pEntry->bytes, pEntry); - - } else { - ths->pLogStore->cacheMiss++; - sNTrace(ths, "miss cache index:%" PRId64, i); - - code = ths->pLogStore->syncLogGetEntry(ths->pLogStore, i, &pEntry); - // ASSERT(code == 0); - // ASSERT(pEntry != NULL); - if (code != 0 || pEntry == NULL) { - sNError(ths, "get log entry error"); - sFatal("vgId:%d, get log entry %" PRId64 " error when commit since %s", ths->vgId, i, terrstr()); - continue; - } - } - - SRpcMsg rpcMsg = {0}; - syncEntry2OriginalRpc(pEntry, &rpcMsg); - - sTrace("do commit index:%" PRId64 ", type:%s", i, TMSG_INFO(pEntry->msgType)); - - // user commit - if ((ths->pFsm->FpCommitCb != NULL) && syncUtilUserCommit(pEntry->originalRpcType)) { - bool internalExecute = true; - if ((ths->replicaNum == 1) && ths->restoreFinish && ths->vgId != 1) { - internalExecute = false; - } - - sNTrace(ths, "user commit index:%" PRId64 ", internal:%d, type:%s", i, internalExecute, - TMSG_INFO(pEntry->msgType)); - - // execute fsm in apply thread, or execute outside syncPropose - if (internalExecute) { - SFsmCbMeta cbMeta = { - .index = pEntry->index, - .lastConfigIndex = syncNodeGetSnapshotConfigIndex(ths, pEntry->index), - .isWeak = pEntry->isWeak, - .code = 0, - .state = ths->state, - .seqNum = pEntry->seqNum, - .term = pEntry->term, - .currentTerm = ths->raftStore.currentTerm, - .flag = flag, - }; - - syncRespMgrGetAndDel(ths->pSyncRespMgr, cbMeta.seqNum, &rpcMsg.info); - ths->pFsm->FpCommitCb(ths->pFsm, &rpcMsg, &cbMeta); - } - } - -#if 0 - // execute in pre-commit - // leader transfer - if (pEntry->originalRpcType == TDMT_SYNC_LEADER_TRANSFER) { - code = syncDoLeaderTransfer(ths, &rpcMsg, pEntry); - ASSERT(code == 0); - } -#endif - - // restore finish - // if only snapshot, a noop entry will be append, so syncLogLastIndex is always ok - if (pEntry->index == ths->pLogStore->syncLogLastIndex(ths->pLogStore)) { - if (ths->restoreFinish == false) { - if (ths->pFsm->FpRestoreFinishCb != NULL) { - ths->pFsm->FpRestoreFinishCb(ths->pFsm); - } - ths->restoreFinish = true; - - int64_t restoreDelay = taosGetTimestampMs() - ths->leaderTime; - sNTrace(ths, "restore finish, index:%" PRId64 ", elapsed:%" PRId64 " ms", pEntry->index, restoreDelay); - } - } - - rpcFreeCont(rpcMsg.pCont); - if (h) { - taosLRUCacheRelease(pCache, h, false); - } else { - syncEntryDestroy(pEntry); - } - } - } - } - return 0; -} - bool syncNodeInRaftGroup(SSyncNode* ths, SRaftId* pRaftId) { for (int32_t i = 0; i < ths->replicaNum; ++i) { if (syncUtilSameId(&((ths->replicasId)[i]), pRaftId)) { diff --git a/source/libs/sync/src/syncPipeline.c b/source/libs/sync/src/syncPipeline.c index b61fc2e90dc8cadd8265346a0bbe38e7a533238c..6cc517fda00c67d6d89dcb2b49d40049a05ea18e 100644 --- a/source/libs/sync/src/syncPipeline.c +++ b/source/libs/sync/src/syncPipeline.c @@ -364,7 +364,11 @@ _out: return ret; } -int32_t syncLogStorePersist(SSyncLogStore* pLogStore, SSyncRaftEntry* pEntry) { +static inline bool syncLogStoreNeedFlush(SSyncRaftEntry* pEntry, int32_t replicaNum) { + return (replicaNum > 1) && (pEntry->originalRpcType == TDMT_VND_COMMIT); +} + +int32_t syncLogStorePersist(SSyncLogStore* pLogStore, SSyncNode* pNode, SSyncRaftEntry* pEntry) { ASSERT(pEntry->index >= 0); SyncIndex lastVer = pLogStore->syncLogLastIndex(pLogStore); if (lastVer >= pEntry->index && pLogStore->syncLogTruncate(pLogStore, pEntry->index) < 0) { @@ -374,7 +378,8 @@ int32_t syncLogStorePersist(SSyncLogStore* pLogStore, SSyncRaftEntry* pEntry) { lastVer = pLogStore->syncLogLastIndex(pLogStore); ASSERT(pEntry->index == lastVer + 1); - if (pLogStore->syncLogAppendEntry(pLogStore, pEntry) < 0) { + bool doFsync = syncLogStoreNeedFlush(pEntry, pNode->replicaNum); + if (pLogStore->syncLogAppendEntry(pLogStore, pEntry, doFsync) < 0) { sError("failed to append sync log entry since %s. index:%" PRId64 ", term:%" PRId64 "", terrstr(), pEntry->index, pEntry->term); return -1; @@ -436,7 +441,7 @@ int64_t syncLogBufferProceed(SSyncLogBuffer* pBuf, SSyncNode* pNode, SyncTerm* p (void)syncNodeReplicateWithoutLock(pNode); // persist - if (syncLogStorePersist(pLogStore, pEntry) < 0) { + if (syncLogStorePersist(pLogStore, pNode, pEntry) < 0) { sError("vgId:%d, failed to persist sync log entry from buffer since %s. index:%" PRId64, pNode->vgId, terrstr(), pEntry->index); goto _out; @@ -940,8 +945,11 @@ int32_t syncNodeLogReplMgrInit(SSyncNode* pNode) { for (int i = 0; i < TSDB_MAX_REPLICA; i++) { ASSERT(pNode->logReplMgrs[i] == NULL); pNode->logReplMgrs[i] = syncLogReplMgrCreate(); + if (pNode->logReplMgrs[i] == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } pNode->logReplMgrs[i]->peerId = i; - ASSERTS(pNode->logReplMgrs[i] != NULL, "Out of memory."); } return 0; } diff --git a/source/libs/sync/src/syncRaftLog.c b/source/libs/sync/src/syncRaftLog.c index ca6d3c314fbeb63fe2eafa3b93bcdc08f936da3e..e6569d99741f762c593c557751e800f11b6c150e 100644 --- a/source/libs/sync/src/syncRaftLog.c +++ b/source/libs/sync/src/syncRaftLog.c @@ -23,7 +23,7 @@ // public function static int32_t raftLogRestoreFromSnapshot(struct SSyncLogStore* pLogStore, SyncIndex snapshotIndex); -static int32_t raftLogAppendEntry(struct SSyncLogStore* pLogStore, SSyncRaftEntry* pEntry); +static int32_t raftLogAppendEntry(struct SSyncLogStore* pLogStore, SSyncRaftEntry* pEntry, bool forceSync); static int32_t raftLogTruncate(struct SSyncLogStore* pLogStore, SyncIndex fromIndex); static bool raftLogExist(struct SSyncLogStore* pLogStore, SyncIndex index); static int32_t raftLogUpdateCommitIndex(SSyncLogStore* pLogStore, SyncIndex index); @@ -192,9 +192,7 @@ SyncTerm raftLogLastTerm(struct SSyncLogStore* pLogStore) { return SYNC_TERM_INVALID; } -static inline bool raftLogForceSync(SSyncRaftEntry* pEntry) { return (pEntry->originalRpcType == TDMT_VND_COMMIT); } - -static int32_t raftLogAppendEntry(struct SSyncLogStore* pLogStore, SSyncRaftEntry* pEntry) { +static int32_t raftLogAppendEntry(struct SSyncLogStore* pLogStore, SSyncRaftEntry* pEntry, bool forceSync) { SSyncLogStoreData* pData = pLogStore->data; SWal* pWal = pData->pWal; @@ -221,7 +219,6 @@ static int32_t raftLogAppendEntry(struct SSyncLogStore* pLogStore, SSyncRaftEntr ASSERT(pEntry->index == index); - bool forceSync = raftLogForceSync(pEntry); walFsync(pWal, forceSync); sNTrace(pData->pSyncNode, "write index:%" PRId64 ", type:%s, origin type:%s, elapsed:%" PRId64, pEntry->index, diff --git a/source/libs/sync/src/syncReplication.c b/source/libs/sync/src/syncReplication.c index 1aa476e84e03ab46a925085ff7792ca88b0af5b4..3df203221b88bbd0b9d804613e2ea50e881be149 100644 --- a/source/libs/sync/src/syncReplication.c +++ b/source/libs/sync/src/syncReplication.c @@ -48,92 +48,6 @@ int32_t syncNodeMaybeSendAppendEntries(SSyncNode* pSyncNode, const SRaftId* destRaftId, SRpcMsg* pRpcMsg); -int32_t syncNodeReplicateOne(SSyncNode* pSyncNode, SRaftId* pDestId, bool snapshot) { - ASSERT(false && "deprecated"); - // next index - SyncIndex nextIndex = syncIndexMgrGetIndex(pSyncNode->pNextIndex, pDestId); - - if (snapshot) { - // maybe start snapshot - SyncIndex logStartIndex = pSyncNode->pLogStore->syncLogBeginIndex(pSyncNode->pLogStore); - SyncIndex logEndIndex = pSyncNode->pLogStore->syncLogEndIndex(pSyncNode->pLogStore); - if (nextIndex < logStartIndex || nextIndex - 1 > logEndIndex) { - sNTrace(pSyncNode, "maybe start snapshot for next-index:%" PRId64 ", start:%" PRId64 ", end:%" PRId64, nextIndex, - logStartIndex, logEndIndex); - // start snapshot - int32_t code = syncNodeStartSnapshot(pSyncNode, pDestId); - } - } - - // pre index, pre term - SyncIndex preLogIndex = syncNodeGetPreIndex(pSyncNode, nextIndex); - SyncTerm preLogTerm = syncNodeGetPreTerm(pSyncNode, nextIndex); - - // prepare entry - SRpcMsg rpcMsg = {0}; - SyncAppendEntries* pMsg = NULL; - - SSyncRaftEntry* pEntry = NULL; - SLRUCache* pCache = pSyncNode->pLogStore->pCache; - LRUHandle* h = taosLRUCacheLookup(pCache, &nextIndex, sizeof(nextIndex)); - int32_t code = 0; - if (h) { - pEntry = (SSyncRaftEntry*)taosLRUCacheValue(pCache, h); - code = 0; - - pSyncNode->pLogStore->cacheHit++; - sNTrace(pSyncNode, "hit cache index:%" PRId64 ", bytes:%u, %p", nextIndex, pEntry->bytes, pEntry); - - } else { - pSyncNode->pLogStore->cacheMiss++; - sNTrace(pSyncNode, "miss cache index:%" PRId64, nextIndex); - - code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, nextIndex, &pEntry); - } - - if (code == 0) { - ASSERT(pEntry != NULL); - - code = syncBuildAppendEntries(&rpcMsg, (int32_t)(pEntry->bytes), pSyncNode->vgId); - ASSERT(code == 0); - - pMsg = rpcMsg.pCont; - memcpy(pMsg->data, pEntry, pEntry->bytes); - } else { - if (terrno == TSDB_CODE_WAL_LOG_NOT_EXIST) { - // no entry in log - code = syncBuildAppendEntries(&rpcMsg, 0, pSyncNode->vgId); - ASSERT(code == 0); - - pMsg = rpcMsg.pCont; - } else { - sNError(pSyncNode, "replicate to dnode:%d error, next-index:%" PRId64, DID(pDestId), nextIndex); - return -1; - } - } - - if (h) { - taosLRUCacheRelease(pCache, h, false); - } else { - syncEntryDestroy(pEntry); - } - - // prepare msg - ASSERT(pMsg != NULL); - pMsg->srcId = pSyncNode->myRaftId; - pMsg->destId = *pDestId; - pMsg->term = pSyncNode->raftStore.currentTerm; - pMsg->prevLogIndex = preLogIndex; - pMsg->prevLogTerm = preLogTerm; - pMsg->commitIndex = pSyncNode->commitIndex; - pMsg->privateTerm = 0; - // pMsg->privateTerm = syncIndexMgrGetTerm(pSyncNode->pNextIndex, pDestId); - - // send msg - syncNodeMaybeSendAppendEntries(pSyncNode, pDestId, &rpcMsg); - return 0; -} - int32_t syncNodeReplicate(SSyncNode* pNode) { SSyncLogBuffer* pBuf = pNode->pLogBuf; taosThreadMutexLock(&pBuf->mutex); @@ -156,25 +70,6 @@ int32_t syncNodeReplicateWithoutLock(SSyncNode* pNode) { return 0; } -int32_t syncNodeReplicateOld(SSyncNode* pSyncNode) { - if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) { - return -1; - } - - sNTrace(pSyncNode, "do replicate"); - - int32_t ret = 0; - for (int i = 0; i < pSyncNode->peersNum; ++i) { - SRaftId* pDestId = &(pSyncNode->peersId[i]); - ret = syncNodeReplicateOne(pSyncNode, pDestId, true); - if (ret != 0) { - sError("vgId:%d, do append entries error for dnode:%d", pSyncNode->vgId, DID(pDestId)); - } - } - - return 0; -} - int32_t syncNodeSendAppendEntries(SSyncNode* pSyncNode, const SRaftId* destRaftId, SRpcMsg* pRpcMsg) { SyncAppendEntries* pMsg = pRpcMsg->pCont; pMsg->destId = *destRaftId; @@ -182,39 +77,6 @@ int32_t syncNodeSendAppendEntries(SSyncNode* pSyncNode, const SRaftId* destRaftI return 0; } -int32_t syncNodeSendAppendEntriesOld(SSyncNode* pSyncNode, const SRaftId* destRaftId, SRpcMsg* pRpcMsg) { - int32_t ret = 0; - SyncAppendEntries* pMsg = pRpcMsg->pCont; - if (pMsg == NULL) { - sError("vgId:%d, sync-append-entries msg is NULL", pSyncNode->vgId); - return 0; - } - - SPeerState* pState = syncNodeGetPeerState(pSyncNode, destRaftId); - if (pState == NULL) { - sError("vgId:%d, replica maybe dropped", pSyncNode->vgId); - return 0; - } - - // save index, otherwise pMsg will be free by rpc - SyncIndex saveLastSendIndex = pState->lastSendIndex; - bool update = false; - if (pMsg->dataLen > 0) { - saveLastSendIndex = pMsg->prevLogIndex + 1; - update = true; - } - - syncLogSendAppendEntries(pSyncNode, pMsg, ""); - syncNodeSendMsgById(destRaftId, pSyncNode, pRpcMsg); - - if (update) { - pState->lastSendIndex = saveLastSendIndex; - pState->lastSendTime = taosGetTimestampMs(); - } - - return ret; -} - int32_t syncNodeMaybeSendAppendEntries(SSyncNode* pSyncNode, const SRaftId* destRaftId, SRpcMsg* pRpcMsg) { int32_t ret = 0; SyncAppendEntries* pMsg = pRpcMsg->pCont; diff --git a/source/libs/wal/src/walMeta.c b/source/libs/wal/src/walMeta.c index 4a0570bee34cafb95f2ce389fa48dbde831383c0..07109883dbc580b29ad84923b2e861ac8724ff0a 100644 --- a/source/libs/wal/src/walMeta.c +++ b/source/libs/wal/src/walMeta.c @@ -322,6 +322,35 @@ bool walLogEntriesComplete(const SWal* pWal) { return complete; } +int walTrimIdxFile(SWal* pWal, int32_t fileIdx) { + SWalFileInfo* pFileInfo = taosArrayGet(pWal->fileInfoSet, fileIdx); + ASSERT(pFileInfo != NULL); + char fnameStr[WAL_FILE_LEN]; + walBuildIdxName(pWal, pFileInfo->firstVer, fnameStr); + + int64_t fileSize = 0; + taosStatFile(fnameStr, &fileSize, NULL); + int64_t records = TMAX(0, pFileInfo->lastVer - pFileInfo->firstVer + 1); + int64_t lastEndOffset = records * sizeof(SWalIdxEntry); + + if (fileSize <= lastEndOffset) { + return 0; + } + + TdFilePtr pFile = taosOpenFile(fnameStr, TD_FILE_READ | TD_FILE_WRITE); + if (pFile == NULL) { + terrno = TAOS_SYSTEM_ERROR(errno); + return -1; + } + + wInfo("vgId:%d, trim idx file. file: %s, size: %" PRId64 ", offset: %" PRId64, pWal->cfg.vgId, fnameStr, fileSize, + lastEndOffset); + + taosFtruncateFile(pFile, lastEndOffset); + taosCloseFile(&pFile); + return 0; +} + int walCheckAndRepairMeta(SWal* pWal) { // load log files, get first/snapshot/last version info const char* logPattern = "^[0-9]+.log$"; @@ -396,6 +425,8 @@ int walCheckAndRepairMeta(SWal* pWal) { } updateMeta = true; + (void)walTrimIdxFile(pWal, fileIdx); + int64_t lastVer = walScanLogGetLastVer(pWal, fileIdx); if (lastVer < 0) { if (terrno != TSDB_CODE_WAL_LOG_NOT_EXIST) { @@ -558,6 +589,7 @@ int walCheckAndRepairIdxFile(SWal* pWal, int32_t fileIdx) { goto _err; } + int64_t count = 0; while (idxEntry.ver < pFileInfo->lastVer) { /*A(idxEntry.ver == ckHead.head.version);*/ @@ -569,11 +601,11 @@ int walCheckAndRepairIdxFile(SWal* pWal, int32_t fileIdx) { idxEntry.offset, fLogNameStr); goto _err; } - wWarn("vgId:%d, wal idx append new entry %" PRId64 " %" PRId64, pWal->cfg.vgId, idxEntry.ver, idxEntry.offset); if (taosWriteFile(pIdxFile, &idxEntry, sizeof(SWalIdxEntry)) < 0) { wError("vgId:%d, failed to append file since %s. file:%s", pWal->cfg.vgId, terrstr(), fnameStr); goto _err; } + count++; } if (taosFsyncFile(pIdxFile) < 0) { @@ -581,6 +613,11 @@ int walCheckAndRepairIdxFile(SWal* pWal, int32_t fileIdx) { goto _err; } + if (count > 0) { + wInfo("vgId:%d, rebuilt %" PRId64 " wal idx entries until lastVer: %" PRId64, pWal->cfg.vgId, count, + pFileInfo->lastVer); + } + (void)taosCloseFile(&pLogFile); (void)taosCloseFile(&pIdxFile); return 0; diff --git a/source/libs/wal/src/walRef.c b/source/libs/wal/src/walRef.c index fa04ba3e58a05c3ee902bde8ace004e821683f37..d3c03c335b2e570b18d78321bd3c2690e1756251 100644 --- a/source/libs/wal/src/walRef.c +++ b/source/libs/wal/src/walRef.c @@ -77,6 +77,31 @@ void walUnrefVer(SWalRef *pRef) { } #endif +SWalRef *walRefFirstVer(SWal *pWal, SWalRef *pRef) { + if (pRef == NULL) { + pRef = walOpenRef(pWal); + if (pRef == NULL) { + return NULL; + } + } + taosThreadMutexLock(&pWal->mutex); + + int64_t ver = walGetFirstVer(pWal); + + wDebug("vgId:%d, wal ref version %" PRId64 " for first", pWal->cfg.vgId, ver); + + pRef->refVer = ver; + // bsearch in fileSet + SWalFileInfo tmpInfo; + tmpInfo.firstVer = ver; + SWalFileInfo *pRet = taosArraySearch(pWal->fileInfoSet, &tmpInfo, compareWalFileInfo, TD_LE); + ASSERT(pRet != NULL); + pRef->refFile = pRet->firstVer; + + taosThreadMutexUnlock(&pWal->mutex); + return pRef; +} + SWalRef *walRefCommittedVer(SWal *pWal) { SWalRef *pRef = walOpenRef(pWal); if (pRef == NULL) { @@ -87,6 +112,8 @@ SWalRef *walRefCommittedVer(SWal *pWal) { int64_t ver = walGetCommittedVer(pWal); + wDebug("vgId:%d, wal ref version %" PRId64 " for committed", pWal->cfg.vgId, ver); + pRef->refVer = ver; // bsearch in fileSet SWalFileInfo tmpInfo; diff --git a/source/os/CMakeLists.txt b/source/os/CMakeLists.txt index 3aac5e97751295be13956283c2e8dae46f4c453e..2a4fcbcf7690f1b141062f6566e45199f23715b9 100644 --- a/source/os/CMakeLists.txt +++ b/source/os/CMakeLists.txt @@ -41,7 +41,7 @@ target_link_libraries( ) if(TD_WINDOWS) target_link_libraries( - os PUBLIC ws2_32 iconv msvcregex wcwidth winmm crashdump dbghelp + os PUBLIC ws2_32 iconv msvcregex wcwidth winmm crashdump dbghelp version ) elseif(TD_DARWIN_64) find_library(CORE_FOUNDATION_FRAMEWORK CoreFoundation) diff --git a/source/os/src/osMath.c b/source/os/src/osMath.c index dddadd5ff6c187621a1b37efb24882ef84448cb9..3b42c141dfe661ebc121eaa25083e3095fdae1ed 100644 --- a/source/os/src/osMath.c +++ b/source/os/src/osMath.c @@ -15,8 +15,8 @@ #define ALLOW_FORBID_FUNC #define _DEFAULT_SOURCE -#include "os.h" #include +#include "os.h" #ifdef WINDOWS void swapStr(char* j, char* J, int width) { @@ -32,7 +32,17 @@ void swapStr(char* j, char* J, int width) { } #endif +int32_t qsortHelper(const void* p1, const void* p2, const void* param) { + __compar_fn_t comparFn = param; + return comparFn(p1, p2); +} + // todo refactor: 1) move away; 2) use merge sort instead; 3) qsort is not a stable sort actually. -void taosSort(void* arr, int64_t sz, int64_t width, __compar_fn_t compar) { - qsort(arr, sz, width, compar); +void taosSort(void* base, int64_t sz, int64_t width, __compar_fn_t compar) { +#ifdef _ALPINE + void* param = compar; + taosqsort(base, width, sz, param, qsortHelper); +#else + qsort(base, sz, width, compar); +#endif } diff --git a/source/os/src/osSysinfo.c b/source/os/src/osSysinfo.c index b915e2964fce236a70108887a78fb71288266535..b5c6edc829568b4fcc6bed96415ba7d630fbb084 100644 --- a/source/os/src/osSysinfo.c +++ b/source/os/src/osSysinfo.c @@ -280,11 +280,46 @@ int32_t taosGetEmail(char *email, int32_t maxLen) { #endif } +#ifdef WINDOWS +bool getWinVersionReleaseName(char *releaseName, int32_t maxLen) { + TCHAR szFileName[MAX_PATH]; + DWORD dwHandle; + DWORD dwLen; + LPVOID lpData; + UINT uLen; + VS_FIXEDFILEINFO *pFileInfo; + + GetWindowsDirectory(szFileName, MAX_PATH); + wsprintf(szFileName, L"%s%s", szFileName, L"\\explorer.exe"); + dwLen = GetFileVersionInfoSize(szFileName, &dwHandle); + if (dwLen == 0) { + return false; + } + + lpData = malloc(dwLen); + if (lpData == NULL) return false; + if (!GetFileVersionInfo(szFileName, dwHandle, dwLen, lpData)) { + free(lpData); + return false; + } + if (!VerQueryValue(lpData, L"\\", (LPVOID *)&pFileInfo, &uLen)) { + free(lpData); + return false; + } + + snprintf(releaseName, maxLen, "Windows %d.%d", HIWORD(pFileInfo->dwProductVersionMS), + LOWORD(pFileInfo->dwProductVersionMS)); + free(lpData); + return true; +} +#endif int32_t taosGetOsReleaseName(char *releaseName, int32_t maxLen) { #ifdef WINDOWS - snprintf(releaseName, maxLen, "Windows"); + if (!getWinVersionReleaseName(releaseName, maxLen)) { + snprintf(releaseName, maxLen, "Windows"); + } return 0; #elif defined(_TD_DARWIN_64) char osversion[32]; @@ -840,7 +875,11 @@ int32_t taosGetSystemUUID(char *uid, int32_t uidlen) { uuid_generate(uuid); // it's caller's responsibility to make enough space for `uid`, that's 36-char + 1-null uuid_unparse_lower(uuid, buf); - memcpy(uid, buf, uidlen); + int n = snprintf(uid, uidlen, "%.*s", (int)sizeof(buf), buf); // though less performance, much safer + if (n >= uidlen) { + // target buffer is too small + return -1; + } return 0; #else int len = 0; diff --git a/source/os/test/osTests.cpp b/source/os/test/osTests.cpp index 2e24bb05269ba9a4529b16726ca8c59d6de7ac68..1d6542e78cd987e5228c94609cf0dd865a6d6396 100644 --- a/source/os/test/osTests.cpp +++ b/source/os/test/osTests.cpp @@ -34,6 +34,12 @@ TEST(osTest, osSystem) { ELogLevel level = DEBUG_FATAL; int32_t dflag = 255; // tsLogEmbedded ? 255 : uDebugFlag taosPrintTrace(flags, level, dflag, 0); + + const int sysLen = 64; + char osSysName[sysLen]; + int ret = taosGetOsReleaseName(osSysName, sysLen); + printf("os systeme name:%s\n", osSysName); + ASSERT_EQ(ret, 0); } void fileOperateOnFree(void *param) { diff --git a/source/util/src/talgo.c b/source/util/src/talgo.c index d9319485b7c3bbed717c054f6d63f91ca2220063..a06aac6afe3a64dcb9e53a7580c797f4d90a06a9 100644 --- a/source/util/src/talgo.c +++ b/source/util/src/talgo.c @@ -41,12 +41,6 @@ static void median(void *src, int64_t size, int64_t s, int64_t e, const void *pa ASSERT(comparFn(elePtrAt(src, size, mid), elePtrAt(src, size, s), param) <= 0 && comparFn(elePtrAt(src, size, s), elePtrAt(src, size, e), param) <= 0); - -#ifdef _DEBUG_VIEW -// tTagsPrints(src[s], pOrderDesc->pColumnModel, &pOrderDesc->orderIdx); -// tTagsPrints(src[mid], pOrderDesc->pColumnModel, &pOrderDesc->orderIdx); -// tTagsPrints(src[e], pOrderDesc->pColumnModel, &pOrderDesc->orderIdx); -#endif } static void tInsertSort(void *src, int64_t size, int32_t s, int32_t e, const void *param, __ext_compar_fn_t comparFn, @@ -278,14 +272,4 @@ void taosheapsort(void *base, int32_t size, int32_t len, const void *parcompar, } taosMemoryFree(buf); - /* - char *buf = taosMemoryCalloc(1, size); - - for (i = len - 1; i > 0; i--) { - doswap(elePtrAt(base, size, 0), elePtrAt(base, size, i)); - taosheapadjust(base, size, 0, i - 1, parcompar, compar, parswap, swap, maxroot); - } - - taosMemoryFreeClear(buf); - */ } diff --git a/source/util/src/tarray.c b/source/util/src/tarray.c index 6920925e5f82b16ba6d213ffbe03d6fa40d21d06..c083ce2f7f577349eea7ced1bcb325f8ecacdf99 100644 --- a/source/util/src/tarray.c +++ b/source/util/src/tarray.c @@ -20,7 +20,10 @@ // todo refactor API SArray* taosArrayInit(size_t size, size_t elemSize) { - assert(elemSize > 0); + if (elemSize == 0) { + terrno = TSDB_CODE_INVALID_PARA; + return NULL; + } if (size < TARRAY_MIN_SIZE) { size = TARRAY_MIN_SIZE; @@ -96,8 +99,6 @@ void* taosArrayAddBatch(SArray* pArray, const void* pData, int32_t nEles) { } void taosArrayRemoveDuplicate(SArray* pArray, __compar_fn_t comparFn, void (*fp)(void*)) { - assert(pArray); - size_t size = pArray->size; if (size <= 1) { return; @@ -136,8 +137,6 @@ void taosArrayRemoveDuplicate(SArray* pArray, __compar_fn_t comparFn, void (*fp) } void taosArrayRemoveDuplicateP(SArray* pArray, __compar_fn_t comparFn, void (*fp)(void*)) { - assert(pArray); - size_t size = pArray->size; if (size <= 1) { return; @@ -197,11 +196,10 @@ void* taosArrayReserve(SArray* pArray, int32_t num) { } void* taosArrayPop(SArray* pArray) { - assert(pArray != NULL); - if (pArray->size == 0) { return NULL; } + pArray->size -= 1; return TARRAY_GET_ELEM(pArray, pArray->size); } @@ -210,16 +208,21 @@ void* taosArrayGet(const SArray* pArray, size_t index) { if (NULL == pArray) { return NULL; } - assert(index < pArray->size); + + if (index >= pArray->size) { + uError("index is out of range, current:%"PRIzu" max:%d", index, pArray->capacity); + return NULL; + } + return TARRAY_GET_ELEM(pArray, index); } void* taosArrayGetP(const SArray* pArray, size_t index) { - assert(index < pArray->size); - - void* d = TARRAY_GET_ELEM(pArray, index); - - return *(void**)d; + void** p = taosArrayGet(pArray, index); + if (p == NULL) { + return NULL; + } + return *p; } void* taosArrayGetLast(const SArray* pArray) { return TARRAY_GET_ELEM(pArray, pArray->size - 1); } @@ -312,9 +315,12 @@ void taosArrayRemoveBatch(SArray* pArray, size_t index, size_t num, FDelete fp) } SArray* taosArrayFromList(const void* src, size_t size, size_t elemSize) { - assert(src != NULL && elemSize > 0); - SArray* pDst = taosArrayInit(size, elemSize); + if (elemSize <= 0) { + terrno = TSDB_CODE_INVALID_PARA; + return NULL; + } + SArray* pDst = taosArrayInit(size, elemSize); memcpy(pDst->pData, src, elemSize * size); pDst->size = size; @@ -322,8 +328,6 @@ SArray* taosArrayFromList(const void* src, size_t size, size_t elemSize) { } SArray* taosArrayDup(const SArray* pSrc, __array_item_dup_fn_t fn) { - assert(pSrc != NULL); - if (pSrc->size == 0) { // empty array list return taosArrayInit(8, pSrc->elemSize); } @@ -415,14 +419,10 @@ void taosArrayDestroyEx(SArray* pArray, FDelete fp) { } void taosArraySort(SArray* pArray, __compar_fn_t compar) { - ASSERT(pArray != NULL && compar != NULL); taosSort(pArray->pData, pArray->size, pArray->elemSize, compar); } void* taosArraySearch(const SArray* pArray, const void* key, __compar_fn_t comparFn, int32_t flags) { - assert(pArray != NULL && comparFn != NULL); - assert(key != NULL); - return taosbsearch(key, pArray->pData, pArray->size, pArray->elemSize, comparFn, flags); } diff --git a/source/util/src/tlog.c b/source/util/src/tlog.c index 34ad9ae6bc39753c897d932993c917848dba64ee..62f074db5b9e28ffe71ca7ec3f4506b619ad20b1 100644 --- a/source/util/src/tlog.c +++ b/source/util/src/tlog.c @@ -897,6 +897,7 @@ void taosLogCrashInfo(char* nodeType, char* pMsg, int64_t msgLen, int signum, vo pFile = taosOpenFile(filepath, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND); if (pFile == NULL) { + terrno = TAOS_SYSTEM_ERROR(errno); taosPrintLog(flags, level, dflag, "failed to open file:%s since %s", filepath, terrstr()); goto _return; } diff --git a/source/util/src/tpagedbuf.c b/source/util/src/tpagedbuf.c index 87b44b2d1337ab157e4499f5165abf0868069bc5..7c60862c56b8544dc815723aa5427dc604a0fff5 100644 --- a/source/util/src/tpagedbuf.c +++ b/source/util/src/tpagedbuf.c @@ -5,7 +5,10 @@ #include "thash.h" #include "tlog.h" -#define GET_DATA_PAYLOAD(_p) ((char*)(_p)->pData + POINTER_BYTES) +#define GET_PAYLOAD_DATA(_p) ((char*)(_p)->pData + POINTER_BYTES) +#define BUF_PAGE_IN_MEM(_p) ((_p)->pData != NULL) +#define CLEAR_BUF_PAGE_IN_MEM_FLAG(_p) ((_p)->pData = NULL) +#define HAS_DATA_IN_DISK(_p) ((_p)->offset >= 0) #define NO_IN_MEM_AVAILABLE_PAGES(_b) (listNEles((_b)->lruList) >= (_b)->inMemPages) typedef struct SPageDiskInfo { @@ -14,7 +17,7 @@ typedef struct SPageDiskInfo { } SPageDiskInfo, SFreeListItem; struct SPageInfo { - SListNode* pn; // point to list node struct + SListNode* pn; // point to list node struct. it is NULL when the page is evicted from the in-memory buffer void* pData; int64_t offset; int32_t pageId; @@ -89,7 +92,7 @@ static char* doDecompressData(void* data, int32_t srcSize, int32_t* dst, SDiskba return data; } -static uint64_t allocatePositionInFile(SDiskbasedBuf* pBuf, size_t size) { +static uint64_t allocateNewPositionInFile(SDiskbasedBuf* pBuf, size_t size) { if (pBuf->pFree == NULL) { return pBuf->nextPos; } else { @@ -112,10 +115,6 @@ static uint64_t allocatePositionInFile(SDiskbasedBuf* pBuf, size_t size) { } } -static void setPageNotInBuf(SPageInfo* pPageInfo) { pPageInfo->pData = NULL; } - -static FORCE_INLINE size_t getAllocPageSize(int32_t pageSize) { return pageSize + POINTER_BYTES + sizeof(SFilePage); } - /** * +--------------------------+-------------------+--------------+ * | PTR to SPageInfo (8bytes)| Payload (PageSize)| 2 Extra Bytes| @@ -124,23 +123,31 @@ static FORCE_INLINE size_t getAllocPageSize(int32_t pageSize) { return pageSize * @param pg * @return */ -static char* doFlushPageToDisk(SDiskbasedBuf* pBuf, SPageInfo* pg) { - ASSERT(!pg->used && pg->pData != NULL); + +static FORCE_INLINE size_t getAllocPageSize(int32_t pageSize) { return pageSize + POINTER_BYTES + sizeof(SFilePage); } + +static char* doFlushBufPage(SDiskbasedBuf* pBuf, SPageInfo* pg) { + if (pg->pData == NULL || pg->used) { + uError("invalid params in paged buffer process when flushing buf to disk, %s", pBuf->id); + terrno = TSDB_CODE_INVALID_PARA; + return NULL; + } int32_t size = pBuf->pageSize; char* t = NULL; - if (pg->offset == -1 || pg->dirty) { - void* payload = GET_DATA_PAYLOAD(pg); + if ((!HAS_DATA_IN_DISK(pg)) || pg->dirty) { + void* payload = GET_PAYLOAD_DATA(pg); t = doCompressData(payload, pBuf->pageSize, &size, pBuf); - ASSERTS(size >= 0, "size is negative"); + if (size < 0) { + uError("failed to compress data when flushing data to disk, %s", pBuf->id); + return NULL; + } } // this page is flushed to disk for the first time if (pg->dirty) { - if (pg->offset == -1) { - ASSERTS(pg->dirty == true, "pg->dirty is false"); - - pg->offset = allocatePositionInFile(pBuf, size); + if (!HAS_DATA_IN_DISK(pg)) { + pg->offset = allocateNewPositionInFile(pBuf, size); pBuf->nextPos += size; int32_t ret = taosLSeekFile(pBuf->pFile, pg->offset, SEEK_SET); @@ -155,6 +162,7 @@ static char* doFlushPageToDisk(SDiskbasedBuf* pBuf, SPageInfo* pg) { return NULL; } + // extend the file size if (pBuf->fileSize < pg->offset + size) { pBuf->fileSize = pg->offset + size; } @@ -169,7 +177,7 @@ static char* doFlushPageToDisk(SDiskbasedBuf* pBuf, SPageInfo* pg) { taosArrayPush(pBuf->pFree, &dinfo); // 2. allocate new position, and update the info - pg->offset = allocatePositionInFile(pBuf, size); + pg->offset = allocateNewPositionInFile(pBuf, size); pBuf->nextPos += size; } @@ -197,20 +205,19 @@ static char* doFlushPageToDisk(SDiskbasedBuf* pBuf, SPageInfo* pg) { size = pg->length; } - ASSERT(size > 0 || (pg->offset == -1 && pg->length == -1)); - char* pDataBuf = pg->pData; memset(pDataBuf, 0, getAllocPageSize(pBuf->pageSize)); + #ifdef BUF_PAGE_DEBUG uDebug("page_flush %p, pageId:%d, offset:%d", pDataBuf, pg->pageId, pg->offset); #endif + pg->length = size; // on disk size return pDataBuf; } -static char* flushPageToDisk(SDiskbasedBuf* pBuf, SPageInfo* pg) { +static char* flushBufPage(SDiskbasedBuf* pBuf, SPageInfo* pg) { int32_t ret = TSDB_CODE_SUCCESS; - ASSERT(((int64_t)pBuf->numOfPages * pBuf->pageSize) == pBuf->totalBufSize && pBuf->numOfPages >= pBuf->inMemPages); if (pBuf->pFile == NULL) { if ((ret = createDiskFile(pBuf)) != TSDB_CODE_SUCCESS) { @@ -219,22 +226,27 @@ static char* flushPageToDisk(SDiskbasedBuf* pBuf, SPageInfo* pg) { } } - char* p = doFlushPageToDisk(pBuf, pg); - setPageNotInBuf(pg); - pg->dirty = false; + char* p = doFlushBufPage(pBuf, pg); + CLEAR_BUF_PAGE_IN_MEM_FLAG(pg); + pg->dirty = false; return p; } // load file block data in disk static int32_t loadPageFromDisk(SDiskbasedBuf* pBuf, SPageInfo* pg) { + if (pg->offset < 0 || pg->length <= 0) { + uError("failed to load buf page from disk, offset:%"PRId64", length:%d, %s", pg->offset, pg->length, pBuf->id); + return TSDB_CODE_INVALID_PARA; + } + int32_t ret = taosLSeekFile(pBuf->pFile, pg->offset, SEEK_SET); if (ret == -1) { ret = TAOS_SYSTEM_ERROR(errno); return ret; } - void* pPage = (void*)GET_DATA_PAYLOAD(pg); + void* pPage = (void*)GET_PAYLOAD_DATA(pg); ret = (int32_t)taosReadFile(pBuf->pFile, pPage, pg->length); if (ret != pg->length) { ret = TAOS_SYSTEM_ERROR(errno); @@ -249,10 +261,14 @@ static int32_t loadPageFromDisk(SDiskbasedBuf* pBuf, SPageInfo* pg) { return 0; } -static SPageInfo* registerPage(SDiskbasedBuf* pBuf, int32_t pageId) { +static SPageInfo* registerNewPageInfo(SDiskbasedBuf* pBuf, int32_t pageId) { pBuf->numOfPages += 1; SPageInfo* ppi = taosMemoryMalloc(sizeof(SPageInfo)); + if (ppi == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return NULL; + } ppi->pageId = pageId; ppi->pData = NULL; @@ -272,46 +288,33 @@ static SListNode* getEldestUnrefedPage(SDiskbasedBuf* pBuf) { SListNode* pn = NULL; while ((pn = tdListNext(&iter)) != NULL) { SPageInfo* pageInfo = *(SPageInfo**)pn->data; - ASSERT(pageInfo->pageId >= 0 && pageInfo->pn == pn); + + SPageInfo* p = *(SPageInfo**)(pageInfo->pData); + ASSERT(pageInfo->pageId >= 0 && pageInfo->pn == pn && p == pageInfo); if (!pageInfo->used) { - // printf("%d is chosen\n", pageInfo->pageId); break; - } else { - // printf("page %d is used, dirty:%d\n", pageInfo->pageId, pageInfo->dirty); } } return pn; } -static char* evacOneDataPage(SDiskbasedBuf* pBuf) { - char* bufPage = NULL; +static char* evictBufPage(SDiskbasedBuf* pBuf) { SListNode* pn = getEldestUnrefedPage(pBuf); - terrno = 0; - - // all pages are referenced by user, try to allocate new space - if (pn == NULL) { - int32_t prev = pBuf->inMemPages; - - // increase by 50% of previous mem pages - pBuf->inMemPages = (int32_t)(pBuf->inMemPages * 1.5f); - - // qWarn("%p in memory buf page not sufficient, expand from %d to %d, page size:%d", pBuf, prev, - // pBuf->inMemPages, pBuf->pageSize); - } else { - tdListPopNode(pBuf->lruList, pn); + if (pn == NULL) { // no available buffer pages now, return. + return NULL; + } - SPageInfo* d = *(SPageInfo**)pn->data; - ASSERTS(d->pn == pn, "d->pn not equal pn"); + terrno = 0; + tdListPopNode(pBuf->lruList, pn); - d->pn = NULL; - taosMemoryFreeClear(pn); + SPageInfo* d = *(SPageInfo**)pn->data; - bufPage = flushPageToDisk(pBuf, d); - } + d->pn = NULL; + taosMemoryFreeClear(pn); - return bufPage; + return flushBufPage(pBuf, d); } static void lruListPushFront(SList* pList, SPageInfo* pi) { @@ -338,13 +341,12 @@ int32_t createDiskbasedBuf(SDiskbasedBuf** pBuf, int32_t pagesize, int32_t inMem SDiskbasedBuf* pPBuf = *pBuf; if (pPBuf == NULL) { - return TSDB_CODE_OUT_OF_MEMORY; + goto _error; } pPBuf->pageSize = pagesize; pPBuf->numOfPages = 0; // all pages are in buffer in the first place pPBuf->totalBufSize = 0; - pPBuf->inMemPages = inMemBufSize / pagesize; // maximum allowed pages, it is a soft limit. pPBuf->allocateId = -1; pPBuf->pFile = NULL; pPBuf->id = strdup(id); @@ -353,33 +355,69 @@ int32_t createDiskbasedBuf(SDiskbasedBuf** pBuf, int32_t pagesize, int32_t inMem pPBuf->freePgList = tdListNew(POINTER_BYTES); // at least more than 2 pages must be in memory - ASSERT(inMemBufSize >= pagesize * 2); + if (inMemBufSize < pagesize * 2) { + inMemBufSize = pagesize * 2; + } + pPBuf->inMemPages = inMemBufSize / pagesize; // maximum allowed pages, it is a soft limit. pPBuf->lruList = tdListNew(POINTER_BYTES); + if (pPBuf->lruList == NULL) { + goto _error; + } // init id hash table _hash_fn_t fn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT); pPBuf->pIdList = taosArrayInit(4, POINTER_BYTES); + if (pPBuf->pIdList == NULL) { + goto _error; + } pPBuf->assistBuf = taosMemoryMalloc(pPBuf->pageSize + 2); // EXTRA BYTES + if (pPBuf->assistBuf == NULL) { + goto _error; + } + pPBuf->all = taosHashInit(10, fn, true, false); - pPBuf->prefix = (char*) dir; + if (pPBuf->all == NULL) { + goto _error; + } + pPBuf->prefix = (char*) dir; pPBuf->emptyDummyIdList = taosArrayInit(1, sizeof(int32_t)); // qDebug("QInfo:0x%"PRIx64" create resBuf for output, page size:%d, inmem buf pages:%d, file:%s", qId, - // pPBuf->pageSize, - // pPBuf->inMemPages, pPBuf->path); + // pPBuf->pageSize, pPBuf->inMemPages, pPBuf->path); return TSDB_CODE_SUCCESS; + _error: + destroyDiskbasedBuf(pPBuf); + return TSDB_CODE_OUT_OF_MEMORY; +} + +static char* doExtractPage(SDiskbasedBuf* pBuf) { + char* availablePage = NULL; + if (NO_IN_MEM_AVAILABLE_PAGES(pBuf)) { + availablePage = evictBufPage(pBuf); + if (availablePage == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + uWarn("no available buf pages, current:%d, max:%d", listNEles(pBuf->lruList), pBuf->inMemPages) + } + } else { + availablePage = taosMemoryCalloc(1, getAllocPageSize(pBuf->pageSize)); // add extract bytes in case of zipped buffer increased. + if (availablePage == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + } + } + + return availablePage; } void* getNewBufPage(SDiskbasedBuf* pBuf, int32_t* pageId) { pBuf->statis.getPages += 1; - char* availablePage = NULL; - if (NO_IN_MEM_AVAILABLE_PAGES(pBuf)) { - availablePage = evacOneDataPage(pBuf); + char* availablePage = doExtractPage(pBuf); + if (availablePage == NULL) { + return NULL; } SPageInfo* pi = NULL; @@ -394,7 +432,10 @@ void* getNewBufPage(SDiskbasedBuf* pBuf, int32_t* pageId) { *pageId = (++pBuf->allocateId); // register page id info - pi = registerPage(pBuf, *pageId); + pi = registerNewPageInfo(pBuf, *pageId); + if (pi == NULL) { + return NULL; + } // add to hash map taosHashPut(pBuf->all, pageId, sizeof(int32_t), &pi, POINTER_BYTES); @@ -402,63 +443,62 @@ void* getNewBufPage(SDiskbasedBuf* pBuf, int32_t* pageId) { } // add to LRU list - ASSERT(listNEles(pBuf->lruList) < pBuf->inMemPages && pBuf->inMemPages > 0); lruListPushFront(pBuf->lruList, pi); - - // allocate buf - if (availablePage == NULL) { - pi->pData = - taosMemoryCalloc(1, getAllocPageSize(pBuf->pageSize)); // add extract bytes in case of zipped buffer increased. - } else { - pi->pData = availablePage; - } + pi->pData = availablePage; ((void**)pi->pData)[0] = pi; #ifdef BUF_PAGE_DEBUG uDebug("page_getNewBufPage , pi->pData:%p, pageId:%d, offset:%" PRId64, pi->pData, pi->pageId, pi->offset); #endif - return (void*)(GET_DATA_PAYLOAD(pi)); + + return (void*)(GET_PAYLOAD_DATA(pi)); } void* getBufPage(SDiskbasedBuf* pBuf, int32_t id) { - ASSERT(pBuf != NULL && id >= 0); + if (id < 0) { + terrno = TSDB_CODE_INVALID_PARA; + uError("invalid page id:%d, %s", id, pBuf->id); + return NULL; + } + pBuf->statis.getPages += 1; SPageInfo** pi = taosHashGet(pBuf->all, &id, sizeof(int32_t)); - ASSERT(pi != NULL && *pi != NULL); + if (pi == NULL || *pi == NULL) { + uError("failed to locate the buffer page:%d, %s", id, pBuf->id); + terrno = TSDB_CODE_INVALID_PARA; + return NULL; + } - if ((*pi)->pData != NULL) { // it is in memory + if (BUF_PAGE_IN_MEM(*pi)) { // it is in memory // no need to update the LRU list if only one page exists if (pBuf->numOfPages == 1) { (*pi)->used = true; - return (void*)(GET_DATA_PAYLOAD(*pi)); + return (void*)(GET_PAYLOAD_DATA(*pi)); } SPageInfo** pInfo = (SPageInfo**)((*pi)->pn->data); - ASSERT(*pInfo == *pi); + if (*pInfo != *pi) { + uError("inconsistently data in paged buffer, pInfo:%p, pi:%p, %s", *pInfo, *pi, pBuf->id); + return NULL; + } lruListMoveToFront(pBuf->lruList, (*pi)); (*pi)->used = true; + #ifdef BUF_PAGE_DEBUG uDebug("page_getBufPage1 pageId:%d, offset:%" PRId64, (*pi)->pageId, (*pi)->offset); #endif - return (void*)(GET_DATA_PAYLOAD(*pi)); + return (void*)(GET_PAYLOAD_DATA(*pi)); } else { // not in memory - ASSERT((*pi)->pData == NULL && (*pi)->pn == NULL && + ASSERT((!BUF_PAGE_IN_MEM(*pi)) && (*pi)->pn == NULL && (((*pi)->length >= 0 && (*pi)->offset >= 0) || ((*pi)->length == -1 && (*pi)->offset == -1))); - char* availablePage = NULL; - if (NO_IN_MEM_AVAILABLE_PAGES(pBuf)) { - availablePage = evacOneDataPage(pBuf); - if (availablePage == NULL) { - return NULL; - } - } + (*pi)->pData = doExtractPage(pBuf); - if (availablePage == NULL) { - (*pi)->pData = taosMemoryCalloc(1, getAllocPageSize(pBuf->pageSize)); - } else { - (*pi)->pData = availablePage; + // failed to evict buffer page, return with error code. + if ((*pi)->pData == NULL) { + return NULL; } // set the ptr to the new SPageInfo @@ -468,23 +508,25 @@ void* getBufPage(SDiskbasedBuf* pBuf, int32_t id) { (*pi)->used = true; // some data has been flushed to disk, and needs to be loaded into buffer again. - if ((*pi)->length > 0 && (*pi)->offset >= 0) { + if (HAS_DATA_IN_DISK(*pi)) { int32_t code = loadPageFromDisk(pBuf, *pi); if (code != 0) { + terrno = code; return NULL; } } #ifdef BUF_PAGE_DEBUG uDebug("page_getBufPage2 pageId:%d, offset:%" PRId64, (*pi)->pageId, (*pi)->offset); #endif - return (void*)(GET_DATA_PAYLOAD(*pi)); + return (void*)(GET_PAYLOAD_DATA(*pi)); } } void releaseBufPage(SDiskbasedBuf* pBuf, void* page) { - if (ASSERTS(pBuf != NULL && page != NULL, "pBuf or page is NULL")) { + if (page == NULL) { return; } + SPageInfo* ppi = getPageInfoFromPayload(page); releaseBufPageInfo(pBuf, ppi); } @@ -493,7 +535,13 @@ void releaseBufPageInfo(SDiskbasedBuf* pBuf, SPageInfo* pi) { #ifdef BUF_PAGE_DEBUG uDebug("page_releaseBufPageInfo pageId:%d, used:%d, offset:%" PRId64, pi->pageId, pi->used, pi->offset); #endif - if (ASSERTS(pi->pData != NULL, "pi->pData is NULL")) { + + if (pi == NULL) { + return; + } + + if (pi->pData == NULL) { + uError("pi->pData (page data) is null"); return; } @@ -504,7 +552,6 @@ void releaseBufPageInfo(SDiskbasedBuf* pBuf, SPageInfo* pi) { size_t getTotalBufSize(const SDiskbasedBuf* pBuf) { return (size_t)pBuf->totalBufSize; } SArray* getDataBufPagesIdList(SDiskbasedBuf* pBuf) { - ASSERT(pBuf != NULL); return pBuf->pIdList; } @@ -582,7 +629,6 @@ SPageInfo* getLastPageInfo(SArray* pList) { } int32_t getPageId(const SPageInfo* pPgInfo) { - ASSERT(pPgInfo != NULL); return pPgInfo->pageId; } diff --git a/tests/develop-test/2-query/show_create_db.py b/tests/develop-test/2-query/show_create_db.py new file mode 100644 index 0000000000000000000000000000000000000000..e5a79074ef2a1b9881f230b3eba2011edca517af --- /dev/null +++ b/tests/develop-test/2-query/show_create_db.py @@ -0,0 +1,82 @@ +import sys +from util.log import * +from util.cases import * +from util.sql import * +from util.dnodes import tdDnodes +from math import inf + +class TDTestCase: + def caseDescription(self): + ''' + case1: [TD-11204]Difference improvement that can ignore negative + ''' + return + + def init(self, conn, logSql, replicaVer=1): + tdLog.debug("start to execute %s" % __file__) + tdSql.init(conn.cursor(), False) + self._conn = conn + + def restartTaosd(self, index=1, dbname="db"): + tdDnodes.stop(index) + tdDnodes.startWithoutSleep(index) + tdSql.execute(f"use scd") + + def run(self): + print("running {}".format(__file__)) + tdSql.execute("drop database if exists scd") + tdSql.execute("create database if not exists scd") + tdSql.execute('use scd') + tdSql.execute('create table stb1 (ts timestamp, c1 bool, c2 tinyint, c3 smallint, c4 int, c5 bigint, c6 float, c7 double, c8 binary(10), c9 nchar(10), c10 tinyint unsigned, c11 smallint unsigned, c12 int unsigned, c13 bigint unsigned) TAGS(t1 int, t2 binary(10), t3 double);') + + tdSql.execute("create table tb1 using stb1 tags(1,'1',1.0);") + + tdSql.execute("create table tb2 using stb1 tags(2,'2',2.0);") + + tdSql.execute("create table tb3 using stb1 tags(3,'3',3.0);") + + tdSql.execute('create database scd2 stt_trigger 3;') + + tdSql.execute('create database scd4 stt_trigger 13;') + + tdSql.query('show create database scd;') + tdSql.checkRows(1) + tdSql.checkData(0, 0, 'scd') + tdSql.checkData(0, 1, "CREATE DATABASE `scd` BUFFER 256 CACHESIZE 1 CACHEMODEL 'none' COMP 2 DURATION 14400m WAL_FSYNC_PERIOD 3000 MAXROWS 4096 MINROWS 100 STT_TRIGGER 1 KEEP 5256000m,5256000m,5256000m PAGES 256 PAGESIZE 4 PRECISION 'ms' REPLICA 1 WAL_LEVEL 1 VGROUPS 2 SINGLE_STABLE 0") + + tdSql.query('show create database scd2;') + tdSql.checkRows(1) + tdSql.checkData(0, 0, 'scd2') + tdSql.checkData(0, 1, "CREATE DATABASE `scd2` BUFFER 256 CACHESIZE 1 CACHEMODEL 'none' COMP 2 DURATION 14400m WAL_FSYNC_PERIOD 3000 MAXROWS 4096 MINROWS 100 STT_TRIGGER 3 KEEP 5256000m,5256000m,5256000m PAGES 256 PAGESIZE 4 PRECISION 'ms' REPLICA 1 WAL_LEVEL 1 VGROUPS 2 SINGLE_STABLE 0") + + tdSql.query('show create database scd4') + tdSql.checkRows(1) + tdSql.checkData(0, 0, 'scd4') + tdSql.checkData(0, 1, "CREATE DATABASE `scd4` BUFFER 256 CACHESIZE 1 CACHEMODEL 'none' COMP 2 DURATION 14400m WAL_FSYNC_PERIOD 3000 MAXROWS 4096 MINROWS 100 STT_TRIGGER 13 KEEP 5256000m,5256000m,5256000m PAGES 256 PAGESIZE 4 PRECISION 'ms' REPLICA 1 WAL_LEVEL 1 VGROUPS 2 SINGLE_STABLE 0") + + + self.restartTaosd(1, dbname='scd') + + tdSql.query('show create database scd;') + tdSql.checkRows(1) + tdSql.checkData(0, 0, 'scd') + tdSql.checkData(0, 1, "CREATE DATABASE `scd` BUFFER 256 CACHESIZE 1 CACHEMODEL 'none' COMP 2 DURATION 14400m WAL_FSYNC_PERIOD 3000 MAXROWS 4096 MINROWS 100 STT_TRIGGER 1 KEEP 5256000m,5256000m,5256000m PAGES 256 PAGESIZE 4 PRECISION 'ms' REPLICA 1 WAL_LEVEL 1 VGROUPS 2 SINGLE_STABLE 0") + + tdSql.query('show create database scd2;') + tdSql.checkRows(1) + tdSql.checkData(0, 0, 'scd2') + tdSql.checkData(0, 1, "CREATE DATABASE `scd2` BUFFER 256 CACHESIZE 1 CACHEMODEL 'none' COMP 2 DURATION 14400m WAL_FSYNC_PERIOD 3000 MAXROWS 4096 MINROWS 100 STT_TRIGGER 3 KEEP 5256000m,5256000m,5256000m PAGES 256 PAGESIZE 4 PRECISION 'ms' REPLICA 1 WAL_LEVEL 1 VGROUPS 2 SINGLE_STABLE 0") + + tdSql.query('show create database scd4') + tdSql.checkRows(1) + tdSql.checkData(0, 0, 'scd4') + tdSql.checkData(0, 1, "CREATE DATABASE `scd4` BUFFER 256 CACHESIZE 1 CACHEMODEL 'none' COMP 2 DURATION 14400m WAL_FSYNC_PERIOD 3000 MAXROWS 4096 MINROWS 100 STT_TRIGGER 13 KEEP 5256000m,5256000m,5256000m PAGES 256 PAGESIZE 4 PRECISION 'ms' REPLICA 1 WAL_LEVEL 1 VGROUPS 2 SINGLE_STABLE 0") + + + tdSql.execute('drop database scd') + def stop(self): + tdSql.close() + tdLog.success("%s successfully executed" % __file__) + +tdCases.addWindows(__file__, TDTestCase()) +tdCases.addLinux(__file__, TDTestCase()) diff --git a/tests/develop-test/5-taos-tools/taosbenchmark/json/sml_json_alltypes.json b/tests/develop-test/5-taos-tools/taosbenchmark/json/sml_json_alltypes.json index 62ec0ff9bfff3d847e785b285b102ae5fecf8c2f..26a08b3fee7af72361229b944de7fefe7f8a1317 100644 --- a/tests/develop-test/5-taos-tools/taosbenchmark/json/sml_json_alltypes.json +++ b/tests/develop-test/5-taos-tools/taosbenchmark/json/sml_json_alltypes.json @@ -16,13 +16,7 @@ "databases": [{ "dbinfo": { "name": "db", - "drop": "yes", - "replica": 1, - "precision": "ms", - "keep": 36500, - "minRows": 100, - "maxRows": 4096, - "comp":2 + "drop": "yes" }, "super_tables": [{ "name": "stb1", @@ -37,7 +31,7 @@ "line_protocol": "json", "childtable_limit": 0, "childtable_offset": 0, - "insert_rows": 10, + "insert_rows": 20, "insert_interval": 0, "interlace_rows": 0, "disorder_ratio": 0, @@ -62,7 +56,7 @@ "line_protocol": "json", "childtable_limit": 0, "childtable_offset": 0, - "insert_rows": 10, + "insert_rows": 20, "insert_interval": 0, "interlace_rows": 0, "disorder_ratio": 0, @@ -87,7 +81,7 @@ "line_protocol": "json", "childtable_limit": 0, "childtable_offset": 0, - "insert_rows": 10, + "insert_rows": 20, "insert_interval": 0, "interlace_rows": 0, "disorder_ratio": 0, @@ -112,7 +106,7 @@ "line_protocol": "json", "childtable_limit": 0, "childtable_offset": 0, - "insert_rows": 10, + "insert_rows": 20, "insert_interval": 0, "interlace_rows": 0, "disorder_ratio": 0, @@ -137,7 +131,7 @@ "line_protocol": "json", "childtable_limit": 0, "childtable_offset": 0, - "insert_rows": 10, + "insert_rows": 20, "insert_interval": 0, "interlace_rows": 0, "disorder_ratio": 0, @@ -162,7 +156,7 @@ "line_protocol": "json", "childtable_limit": 0, "childtable_offset": 0, - "insert_rows": 10, + "insert_rows": 20, "insert_interval": 0, "interlace_rows": 0, "disorder_ratio": 0, @@ -187,7 +181,7 @@ "line_protocol": "json", "childtable_limit": 0, "childtable_offset": 0, - "insert_rows": 10, + "insert_rows": 20, "insert_interval": 0, "interlace_rows": 0, "disorder_ratio": 0, @@ -212,7 +206,7 @@ "line_protocol": "json", "childtable_limit": 0, "childtable_offset": 0, - "insert_rows": 10, + "insert_rows": 20, "insert_interval": 0, "interlace_rows": 0, "disorder_ratio": 0, @@ -237,7 +231,7 @@ "line_protocol": "json", "childtable_limit": 0, "childtable_offset": 0, - "insert_rows": 10, + "insert_rows": 20, "insert_interval": 0, "interlace_rows": 0, "disorder_ratio": 0, diff --git a/tests/develop-test/5-taos-tools/taosbenchmark/sml_json_alltypes.py b/tests/develop-test/5-taos-tools/taosbenchmark/sml_json_alltypes.py index 789a0ce0a2aaf195665f3ab483428cbca346b1b0..0d10741331578f678198ad8dac597fe56a6c0709 100644 --- a/tests/develop-test/5-taos-tools/taosbenchmark/sml_json_alltypes.py +++ b/tests/develop-test/5-taos-tools/taosbenchmark/sml_json_alltypes.py @@ -19,32 +19,38 @@ from util.dnodes import * class TDTestCase: def caseDescription(self): - ''' + """ [TD-11510] taosBenchmark test cases - ''' - return + """ def init(self, conn, logSql, replicaVar=1): - self.replicaVar = int(replicaVar) tdLog.debug("start to execute %s" % __file__) + self.replicaVar = int(replicaVar) tdSql.init(conn.cursor(), logSql) def getPath(self, tool="taosBenchmark"): selfPath = os.path.dirname(os.path.realpath(__file__)) - if ("community" in selfPath): - projPath = selfPath[:selfPath.find("community")] + if "community" in selfPath: + projPath = selfPath[: selfPath.find("community")] + elif "src" in selfPath: + projPath = selfPath[: selfPath.find("src")] + elif "/tools/" in selfPath: + projPath = selfPath[: selfPath.find("/tools/")] + elif "/tests/" in selfPath: + projPath = selfPath[: selfPath.find("/tests/")] else: - projPath = selfPath[:selfPath.find("tests")] + tdLog.info("cannot found %s in path: %s, use system's" % (tool, selfPath)) + projPath = "/usr/local/taos/bin/" paths = [] - for root, dirs, files in os.walk(projPath): - if ((tool) in files): + for root, dummy, files in os.walk(projPath): + if (tool) in files: rootRealPath = os.path.dirname(os.path.realpath(root)) - if ("packaging" not in rootRealPath): + if "packaging" not in rootRealPath: paths.append(os.path.join(root, tool)) break - if (len(paths) == 0): + if len(paths) == 0: tdLog.exit("taosBenchmark not found!") return else: @@ -52,49 +58,63 @@ class TDTestCase: return paths[0] def run(self): + tdSql.query("select client_version()") + client_ver = "".join(tdSql.queryResult[0]) + major_ver = client_ver.split(".")[0] + binPath = self.getPath() - cmd = "%s -f ./5-taos-tools/taosbenchmark/json/sml_json_alltypes.json" %binPath + cmd = "%s -f ./5-taos-tools/taosbenchmark/json/sml_json_alltypes.json" % binPath tdLog.info("%s" % cmd) os.system("%s" % cmd) tdSql.execute("reset query cache") tdSql.query("describe db.stb1") tdSql.checkData(1, 1, "BOOL") tdSql.query("describe db.stb2") - tdSql.checkData(1, 1, "TINYINT") + tdSql.checkData(1, 1, "DOUBLE") tdSql.query("describe db.stb3") - tdSql.checkData(1, 1, "SMALLINT") + tdSql.checkData(1, 1, "DOUBLE") tdSql.query("describe db.stb4") - tdSql.checkData(1, 1, "INT") + tdSql.checkData(1, 1, "DOUBLE") tdSql.query("describe db.stb5") - tdSql.checkData(1, 1, "BIGINT") + tdSql.checkData(1, 1, "DOUBLE") tdSql.query("describe db.stb6") - tdSql.checkData(1, 1, "FLOAT") + tdSql.checkData(1, 1, "DOUBLE") tdSql.query("describe db.stb7") tdSql.checkData(1, 1, "DOUBLE") tdSql.query("describe db.stb8") - tdSql.checkData(1, 1, "VARCHAR") - tdSql.checkData(1, 2, 16) + if major_ver == "3": + tdSql.checkData(1, 1, "VARCHAR") + tdSql.checkData(1, 2, 16) + else: + tdSql.checkData(1, 1, "NCHAR") + tdSql.checkData(1, 2, 8) + tdSql.query("describe db.stb9") - tdSql.checkData(1, 1, "NCHAR") - tdSql.checkData(1, 2, 16) + if major_ver == "3": + tdSql.checkData(1, 1, "VARCHAR") + tdSql.checkData(1, 2, 16) + else: + tdSql.checkData(1, 1, "NCHAR") + tdSql.checkData(1, 2, 8) + tdSql.query("select count(*) from db.stb1") - tdSql.checkData(0, 0, 80) + tdSql.checkData(0, 0, 160) tdSql.query("select count(*) from db.stb2") - tdSql.checkData(0, 0, 80) + tdSql.checkData(0, 0, 160) tdSql.query("select count(*) from db.stb3") - tdSql.checkData(0, 0, 80) + tdSql.checkData(0, 0, 160) tdSql.query("select count(*) from db.stb4") - tdSql.checkData(0, 0, 80) + tdSql.checkData(0, 0, 160) tdSql.query("select count(*) from db.stb5") - tdSql.checkData(0, 0, 80) + tdSql.checkData(0, 0, 160) tdSql.query("select count(*) from db.stb6") - tdSql.checkData(0, 0, 80) + tdSql.checkData(0, 0, 160) tdSql.query("select count(*) from db.stb7") - tdSql.checkData(0, 0, 80) + tdSql.checkData(0, 0, 160) tdSql.query("select count(*) from db.stb8") - tdSql.checkData(0, 0, 80) + tdSql.checkData(0, 0, 160) tdSql.query("select count(*) from db.stb9") - tdSql.checkData(0, 0, 80) + tdSql.checkData(0, 0, 160) def stop(self): tdSql.close() diff --git a/tests/docs-examples-test/python.sh b/tests/docs-examples-test/python.sh index 140d05395bdf6c32bbded25bb53ffaab523e3434..ccb391b7527fbf2490911d868d08c87436221162 100644 --- a/tests/docs-examples-test/python.sh +++ b/tests/docs-examples-test/python.sh @@ -23,7 +23,7 @@ python3 bind_param_example.py # 4 taos -s "drop database power" -python3 multi_bind_example.py +python3 multi_bind_example.py # 5 python3 query_example.py @@ -44,4 +44,43 @@ taos -s "drop database test" python3 json_protocol_example.py # 10 -# python3 subscribe_demo.py +pip install SQLAlchemy +pip install pandas +taosBenchmark -y -d power -t 10 -n 10 +python3 conn_native_pandas.py +python3 conn_rest_pandas.py +taos -s "drop database if exists power" + +# 11 +taos -s "create database if not exists test" +python3 connect_native_reference.py + +# 12 +python3 connect_rest_examples.py + +# 13 +python3 handle_exception.py + +# 14 +taosBenchmark -y -d power -t 2 -n 10 +python3 rest_client_example.py +taos -s "drop database if exists power" + +# 15 +python3 result_set_examples.py + +# 16 +python3 tmq_example.py + +# 17 +python3 sql_writer.py + +# 18 +python3 mockdatasource.py + +# 19 +python3 fast_write_example.py + +# 20 +pip3 install kafka-python +python3 kafka_example.py diff --git a/tests/parallel_test/cases.task b/tests/parallel_test/cases.task index c28d0745c4cafa192aa39b0ae96668c4e323f652..94d87c705cfaa07028ed64caf30ae668ad245cf3 100644 --- a/tests/parallel_test/cases.task +++ b/tests/parallel_test/cases.task @@ -1060,6 +1060,7 @@ #develop test ,,n,develop-test,python3 ./test.py -f 2-query/table_count_scan.py +,,n,develop-test,python3 ./test.py -f 2-query/show_create_db.py ,,n,develop-test,python3 ./test.py -f 5-taos-tools/taosbenchmark/auto_create_table_json.py ,,n,develop-test,python3 ./test.py -f 5-taos-tools/taosbenchmark/custom_col_tag.py ,,n,develop-test,python3 ./test.py -f 5-taos-tools/taosbenchmark/default_json.py diff --git a/tests/parallel_test/container_build.sh b/tests/parallel_test/container_build.sh index 221e5490560252989b81264247ca2b8bf6eda7b1..ff854449bb913bea8bf5dd1b6477d7d9c7a7b70e 100755 --- a/tests/parallel_test/container_build.sh +++ b/tests/parallel_test/container_build.sh @@ -55,7 +55,7 @@ fi date docker run \ -v $REP_MOUNT_PARAM \ - --rm --ulimit core=-1 taos_test:v1.0 sh -c "cd $REP_DIR;rm -rf debug;mkdir -p debug;cd debug;cmake .. -DBUILD_HTTP=false -DBUILD_TOOLS=true -DBUILD_TEST=true -DWEBSOCKET=true;make -j || exit 1" + --rm --ulimit core=-1 taos_test:v1.0 sh -c "cd $REP_DIR;rm -rf debug;mkdir -p debug;cd debug;cmake .. -DBUILD_HTTP=false -DBUILD_TOOLS=true -DBUILD_TEST=true -DWEBSOCKET=true -DBUILD_TAOSX=true;make -j || exit 1" if [[ -d ${WORKDIR}/debugNoSan ]] ;then echo "delete ${WORKDIR}/debugNoSan" @@ -70,7 +70,7 @@ mv ${REP_REAL_PATH}/debug ${WORKDIR}/debugNoSan date docker run \ -v $REP_MOUNT_PARAM \ - --rm --ulimit core=-1 taos_test:v1.0 sh -c "cd $REP_DIR;rm -rf debug;mkdir -p debug;cd debug;cmake .. -DBUILD_HTTP=false -DBUILD_TOOLS=true -DBUILD_TEST=true -DWEBSOCKET=true -DBUILD_SANITIZER=1 -DTOOLS_SANITIZE=true -DTOOLS_BUILD_TYPE=Debug;make -j || exit 1 " + --rm --ulimit core=-1 taos_test:v1.0 sh -c "cd $REP_DIR;rm -rf debug;mkdir -p debug;cd debug;cmake .. -DBUILD_HTTP=false -DBUILD_TOOLS=true -DBUILD_TEST=true -DWEBSOCKET=true -DBUILD_SANITIZER=1 -DTOOLS_SANITIZE=true -DTOOLS_BUILD_TYPE=Debug -DBUILD_TAOSX=true;make -j || exit 1 " mv ${REP_REAL_PATH}/debug ${WORKDIR}/debugSan diff --git a/tests/script/api/batchprepare.c b/tests/script/api/batchprepare.c index 88dada44accf588357314843d2a7d09964c896c0..0903095dc98dcf3652546859003fa65fd88e1569 100644 --- a/tests/script/api/batchprepare.c +++ b/tests/script/api/batchprepare.c @@ -2828,7 +2828,7 @@ void runAll(TAOS *taos) { printf("%s Begin\n", gCaseCtrl.caseCatalog); runCaseList(taos); -#if 0 +#if 1 strcpy(gCaseCtrl.caseCatalog, "Micro DB precision Test"); printf("%s Begin\n", gCaseCtrl.caseCatalog); gCaseCtrl.precision = TIME_PRECISION_MICRO; diff --git a/tests/script/tsim/db/alter_replica_13.sim b/tests/script/tsim/db/alter_replica_13.sim index d75acb50ad087383fd2e5aabadfb0e2c1165204e..a9dc1741a1681a25da42c842e5c82078bad40ae7 100644 --- a/tests/script/tsim/db/alter_replica_13.sim +++ b/tests/script/tsim/db/alter_replica_13.sim @@ -79,6 +79,7 @@ sql insert into db.ctb6 values(now, 6, "6") sql insert into db.ctb7 values(now, 7, "7") sql insert into db.ctb8 values(now, 8, "8") sql insert into db.ctb9 values(now, 9, "9") +sql flush database db; print =============== step3: create dnodes sql create dnode $hostname port 7300 diff --git a/tests/script/tsim/query/sys_tbname.sim b/tests/script/tsim/query/sys_tbname.sim index 9b16d982026b1648da938d9f74b4c0ee52f989c4..7b3953129a81d6fa56b093c8b048b2071b6fbe70 100644 --- a/tests/script/tsim/query/sys_tbname.sim +++ b/tests/script/tsim/query/sys_tbname.sim @@ -86,4 +86,23 @@ if $data00 != @ins_tags@ then return -1 endi +sql create stable stb(ts timestamp, f int) tags(t1 int, t2 int, t3 int, t4 int, t5 int); + +$i = 0 +$tbNum = 1000 +$tbPrefix = stb_tb +while $i < $tbNum + $tb = $tbPrefix . $i + sql create table $tb using stb tags( $i , $i , $i , $i , $i ) + + $i = $i + 1 +endw + +sql select tag_value from information_schema.ins_tags where stable_name='stb'; +if $rows != 5000 then + print $rows + return -1 +endi + + #system sh/exec.sh -n dnode1 -s stop -x SIGINT diff --git a/tests/script/tsim/sma/rsmaCreateInsertQuery.sim b/tests/script/tsim/sma/rsmaCreateInsertQuery.sim index 508e6f88c14f09d4e55c8bd3d40ea78426e5fa43..b3144e4e0dd217319a0d58bf3222360fcd5fa355 100644 --- a/tests/script/tsim/sma/rsmaCreateInsertQuery.sim +++ b/tests/script/tsim/sma/rsmaCreateInsertQuery.sim @@ -81,8 +81,60 @@ if $data01 != 10 then endi #=================================================================== +system sh/exec.sh -n dnode1 -s stop -x SIGINT +system sh/exec.sh -n dnode1 -s start +print =============== wait maxdelay 15+2 seconds for results after reboot +sleep 17000 + +print =============== select * from retention level 2 from memory after reboot +sql select * from ct1; +print $data00 $data01 +if $rows > 2 then + print retention level 2 file rows $rows > 2 + return -1 +endi + + +if $data01 != 1 then + if $data01 != 10 then + print =============> $data01 + print retention level 2 file result $data01 != 1 or 10 + return -1 + endi +endi + +print =============== select * from retention level 1 from memory after reboot +sql select * from ct1 where ts > now-8d; +print $data00 $data01 +if $rows > 2 then + print retention level 1 file rows $rows > 2 + return -1 +endi + +if $data01 != 1 then + if $data01 != 10 then + print retention level 1 file result $data01 != 1 or 10 + return -1 + endi +endi + +print =============== select * from retention level 0 from memory after reboot +sql select * from ct1 where ts > now-3d; +print $data00 $data01 +print $data10 $data11 +print $data20 $data21 + +if $rows < 1 then + print retention level 0 file rows $rows < 1 + return -1 +endi + +if $data01 != 10 then + print retention level 0 file result $data01 != 10 + return -1 +endi -#==================== reboot to trigger commit data to file +#==================== flush database to trigger commit data to file sql flush database d0; system sh/exec.sh -n dnode1 -s stop -x SIGINT system sh/exec.sh -n dnode1 -s start diff --git a/tests/script/tsim/sma/rsmaPersistenceRecovery.sim b/tests/script/tsim/sma/rsmaPersistenceRecovery.sim index 4117a2403dcc7156a8b9ea3adb840fe25b26b376..0c9d23335ea3b378a2dbe849dc8f7fd773470dea 100644 --- a/tests/script/tsim/sma/rsmaPersistenceRecovery.sim +++ b/tests/script/tsim/sma/rsmaPersistenceRecovery.sim @@ -82,9 +82,62 @@ if $data01 != 10 then endi #=================================================================== +system sh/exec.sh -n dnode1 -s stop -x SIGINT +system sh/exec.sh -n dnode1 -s start +print =============== wait maxdelay 5+2 seconds for results after reboot +sleep 7000 + +print =============== select * from retention level 2 from memory after reboot +sql select * from ct1; +print $data00 $data01 $data02 +print $data10 $data11 $data12 +if $rows > 2 then + print retention level 2 file rows $rows > 2 + return -1 +endi + + +if $data01 != 100 then + if $data01 != 10 then + print retention level 2 file result $data01 != 100 or 10 + return -1 + endi +endi + +print =============== select * from retention level 1 from memory after reboot +sql select * from ct1 where ts > now-8d; +print $data00 $data01 $data02 +print $data10 $data11 $data12 +if $rows > 2 then + print retention level 1 file rows $rows > 2 + return -1 +endi + +if $data01 != 100 then + if $data01 != 10 then + print retention level 1 file result $data01 != 100 or 10 + return -1 + endi +endi + +print =============== select * from retention level 0 from memory after reboot +sql select * from ct1 where ts > now-3d; +print $data00 $data01 $data02 +print $data10 $data11 $data12 +print $data20 $data21 $data22 + +if $rows < 1 then + print retention level 0 file rows $rows < 1 + return -1 +endi + +if $data01 != 10 then + print retention level 0 file result $data01 != 10 + return -1 +endi -#==================== reboot to trigger commit data to file +#==================== flush database to trigger commit data to file sql flush database d0; system sh/exec.sh -n dnode1 -s stop -x SIGINT system sh/exec.sh -n dnode1 -s start diff --git a/tests/script/tsim/sma/tsmaCreateInsertQuery.sim b/tests/script/tsim/sma/tsmaCreateInsertQuery.sim index 27f4a475d273bc6f500fcc4075892cc61ebe3635..9b6fc96bc00fde8418bf1d29d4c8cd8fc7f02091 100644 --- a/tests/script/tsim/sma/tsmaCreateInsertQuery.sim +++ b/tests/script/tsim/sma/tsmaCreateInsertQuery.sim @@ -136,7 +136,76 @@ system sh/exec.sh -n dnode1 -s stop -x SIGINT system sh/exec.sh -n dnode1 -s start sleep 50 +print =============== select * from ct1 from memory after reboot +sql select * from ct1; +print $data00 $data01 $data02 $data03 +print $data10 $data11 $data12 $data13 +print $data20 $data21 $data22 $data23 +print $data30 $data31 $data32 $data33 +print $data40 $data41 $data42 $data43 +if $rows != 5 then + print rows $rows != 5 + return -1 +endi + +print =============== select * from stb from memory in designated vgroup after reboot +sql select _wstart, _wend, min(c1),max(c2),max(c1) from stb interval(5m,10s) sliding(5m); +print $data00 $data01 $data02 $data03 $data04 +print $data10 $data11 $data12 $data13 $data14 +if $rows != 1 then + print rows $rows != 1 + return -1 +endi + +if $data02 != -13 then + print data02 $data02 != -13 + return -1 +endi + +if $data03 != 20.00000 then + print data03 $data03 != 20.00000 + return -1 +endi + +if $data04 != 20 then + print data04 $data04 != 20 + return -1 +endi + +print =============== select * from stb from memory in common vgroups after reboot +sql select _wstart, _wend, min(c1),max(c2),max(c1),max(c3) from stb interval(5m,10s) sliding(5m); +print $data00 $data01 $data02 $data03 $data04 $data05 +if $rows != 1 then + print rows $rows != 1 + return -1 +endi + +if $data02 != -13 then + print data02 $data02 != -13 + return -1 +endi + +if $data03 != 20.00000 then + print data03 $data03 != 20.00000 + return -1 +endi + +if $data04 != 20 then + print data04 $data04 != 20 + return -1 +endi + +if $data05 != 30.000000000 then + print data05 $data05 != 30.000000000 + return -1 +endi + +#==================== flush database to trigger commit data to file +sql flush database d1; +system sh/exec.sh -n dnode1 -s stop -x SIGINT +system sh/exec.sh -n dnode1 -s start +sleep 50 print =============== select * from ct1 from file sql select * from ct1; print $data00 $data01 $data02 $data03 diff --git a/tests/system-test/7-tmq/tmqUpdate-1ctb.py b/tests/system-test/7-tmq/tmqUpdate-1ctb.py index b974e4a41a2c60d4c882b2006400f500ec799efc..db2ec3285dd87de532a29021142e4285c05718db 100644 --- a/tests/system-test/7-tmq/tmqUpdate-1ctb.py +++ b/tests/system-test/7-tmq/tmqUpdate-1ctb.py @@ -206,7 +206,7 @@ class TDTestCase: paraDict['rowsPerTbl'] = self.rowsPerTbl consumerId = 1 if self.snapshot == 0: - expectrowcnt = int(paraDict["rowsPerTbl"] * paraDict["ctbNum"] * (2)) + expectrowcnt = int(paraDict["rowsPerTbl"] * paraDict["ctbNum"] * (1/2)) elif self.snapshot == 1: expectrowcnt = int(paraDict["rowsPerTbl"] * paraDict["ctbNum"] * (1)) diff --git a/tests/system-test/7-tmq/tmqUpdate-multiCtb-snapshot0.py b/tests/system-test/7-tmq/tmqUpdate-multiCtb-snapshot0.py index d5df88cf43b1e207f3856807bb9b0bcf55b4b8c6..daffff44c1cf0dda7c4ecf5ac2dfd3dadfdf5504 100644 --- a/tests/system-test/7-tmq/tmqUpdate-multiCtb-snapshot0.py +++ b/tests/system-test/7-tmq/tmqUpdate-multiCtb-snapshot0.py @@ -213,9 +213,9 @@ class TDTestCase: paraDict['rowsPerTbl'] = self.rowsPerTbl consumerId = 1 if self.snapshot == 0: - expectrowcnt = int(paraDict["rowsPerTbl"] * paraDict["ctbNum"] * (2 + 1/2*1/2*2 + 1/2*1/2)) + expectrowcnt = int(paraDict["rowsPerTbl"] * paraDict["ctbNum"] * (1/2) * (1/2*3)) elif self.snapshot == 1: - expectrowcnt = int(paraDict["rowsPerTbl"] * paraDict["ctbNum"] * (2 + 1/2*1/2)) + expectrowcnt = int(paraDict["rowsPerTbl"] * paraDict["ctbNum"] * (1 + 1/2)) topicList = topicFromStb1 ifcheckdata = 1 diff --git a/tools/shell/inc/shellInt.h b/tools/shell/inc/shellInt.h index 6b3bc56dc767712b3607075aa07fafb85530d39b..1fe09f586356554d017bfbb572dab84ab16baf52 100644 --- a/tools/shell/inc/shellInt.h +++ b/tools/shell/inc/shellInt.h @@ -148,5 +148,6 @@ void shellRunSingleCommandWebsocketImp(char *command); // shellMain.c extern SShellObj shell; +extern void tscWriteCrashInfo(int signum, void *sigInfo, void *context); #endif /*_TD_SHELL_INT_H_*/ diff --git a/tools/shell/src/shellEngine.c b/tools/shell/src/shellEngine.c index 4a9cf8343120654899a9080e4f6e219685c88cdf..1f79cfcc0455d5aa911b154bce9817e264da9d03 100644 --- a/tools/shell/src/shellEngine.c +++ b/tools/shell/src/shellEngine.c @@ -1137,10 +1137,8 @@ int32_t shellExecute() { taosSetSignal(SIGTERM, shellQueryInterruptHandler); taosSetSignal(SIGHUP, shellQueryInterruptHandler); - taosSetSignal(SIGABRT, shellQueryInterruptHandler); - taosSetSignal(SIGINT, shellQueryInterruptHandler); - + #ifdef WEBSOCKET if (!shell.args.restful && !shell.args.cloud) { #endif diff --git a/tools/shell/src/shellMain.c b/tools/shell/src/shellMain.c index fa3c0f2585ad6d6ef14c3dda35fb64024a521a02..22b8e89959593e9da18124908fedac31bd6b45ed 100644 --- a/tools/shell/src/shellMain.c +++ b/tools/shell/src/shellMain.c @@ -19,6 +19,29 @@ SShellObj shell = {0}; + +void shellCrashHandler(int signum, void *sigInfo, void *context) { + taosIgnSignal(SIGTERM); + taosIgnSignal(SIGHUP); + taosIgnSignal(SIGINT); + taosIgnSignal(SIGBREAK); + +#if !defined(WINDOWS) + taosIgnSignal(SIGBUS); +#endif + taosIgnSignal(SIGABRT); + taosIgnSignal(SIGFPE); + taosIgnSignal(SIGSEGV); + + tscWriteCrashInfo(signum, sigInfo, context); + +#ifdef _TD_DARWIN_64 + exit(signum); +#elif defined(WINDOWS) + exit(signum); +#endif +} + int main(int argc, char *argv[]) { shell.exit = false; #ifdef WEBSOCKET @@ -26,6 +49,13 @@ int main(int argc, char *argv[]) { shell.args.cloud = true; #endif +#if !defined(WINDOWS) + taosSetSignal(SIGBUS, shellCrashHandler); +#endif + taosSetSignal(SIGABRT, shellCrashHandler); + taosSetSignal(SIGFPE, shellCrashHandler); + taosSetSignal(SIGSEGV, shellCrashHandler); + if (shellCheckIntSize() != 0) { return -1; }