Merge remote-tracking branch 'origin/3.0' into enh/TD-18702

2a8d791e · Shengliang Guan · 9f2078ae · 9a4b5a16 · 2a8d791e · 2a8d791e
48 changed file
--- a/cmake/taosadapter_CMakeLists.txt.in
+++ b/cmake/taosadapter_CMakeLists.txt.in
@@ -2,7 +2,7 @@
 # taosadapter
 ExternalProject_Add(taosadapter
        GIT_REPOSITORY https://github.com/taosdata/taosadapter.git
-        GIT_TAG 213f8b3
+        GIT_TAG 3e08996
        SOURCE_DIR "${TD_SOURCE_DIR}/tools/taosadapter"
        BINARY_DIR ""
        #BUILD_IN_SOURCE TRUE

--- a/docs/en/12-taos-sql/02-database.md
+++ b/docs/en/12-taos-sql/02-database.md
@@ -58,7 +58,7 @@ database_option: {
 - WAL_FSYNC_PERIOD: specifies the interval (in milliseconds) at which data is written from the WAL to disk. This parameter takes effect only when the WAL parameter is set to 2. The default value is 3000. Enter a value between 0 and 180000. The value 0 indicates that incoming data is immediately written to disk.
 - MAXROWS: specifies the maximum number of rows recorded in a block. The default value is 4096.
 - MINROWS: specifies the minimum number of rows recorded in a block. The default value is 100.
- KEEP: specifies the time for which data is retained. Enter a value between 1 and 365000. The default value is 3650. The value of the KEEP parameter must be greater than or equal to the value of the DURATION parameter. TDengine automatically deletes data that is older than the value of the KEEP parameter. You can use m (minutes), h (hours), and d (days) as the unit, for example KEEP 100h or KEEP 10d. If you do not include a unit, d is used by default.
+- KEEP: specifies the time for which data is retained. Enter a value between 1 and 365000. The default value is 3650. The value of the KEEP parameter must be greater than or equal to the value of the DURATION parameter. TDengine automatically deletes data that is older than the value of the KEEP parameter. You can use m (minutes), h (hours), and d (days) as the unit, for example KEEP 100h or KEEP 10d. If you do not include a unit, d is used by default. The Enterprise Edition supports [Tiered Storage](https://docs.tdengine.com/tdinternal/arch/#tiered-storage) function, thus multiple KEEP values (comma separated and up to 3 values supported, and meet keep 0 <= keep 1 <= keep 2, e.g. KEEP 100h,100d,3650d) are supported; the Community Edition does not support Tiered Storage function (although multiple keep values are configured, they do not take effect, only the maximum keep value is used as KEEP).
 - PAGES: specifies the number of pages in the metadata storage engine cache on each vnode. Enter a value greater than or equal to 64. The default value is 256. The space occupied by metadata storage on each vnode is equal to the product of the values of the PAGESIZE and PAGES parameters. The space occupied by default is 1 MB.
 - PAGESIZE: specifies the size (in KB) of each page in the metadata storage engine cache on each vnode. The default value is 4. Enter a value between 1 and 16384.
 - PRECISION: specifies the precision at which a database records timestamps. Enter ms for milliseconds, us for microseconds, or ns for nanoseconds. The default value is ms.

--- a/docs/en/14-reference/12-config/index.md
+++ b/docs/en/14-reference/12-config/index.md
@@ -323,6 +323,7 @@ The charset that takes effect is UTF-8.
 | Applicable    | Server Only                                 |
 | Meaning       | All data files are stored in this directory |
 | Default Value | /var/lib/taos                               |
+| Note          | The [Tiered Storage](https://docs.tdengine.com/tdinternal/arch/#tiered-storage) function needs to be used in conjunction with the [KEEP](https://docs.tdengine.com/taos-sql/database/#parameters) parameter |

 ### tempDir


--- a/docs/examples/go/go.sum
+++ b/docs/examples/go/go.sum
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
+github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/gorilla/websocket v1.5.0/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
+github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
+github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
+github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/taosdata/driver-go/v3 v3.1.0/go.mod h1:H2vo/At+rOPY1aMzUV9P49SVX7NlXb3LAbKw+MCLrmU=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
--- a/docs/examples/python/conn_native_pandas.py
+++ b/docs/examples/python/conn_native_pandas.py
 import pandas
-from sqlalchemy import create_engine
+from sqlalchemy import create_engine, text

 engine = create_engine("taos://root:taosdata@localhost:6030/power")
-df = pandas.read_sql("SELECT * FROM meters", engine)
+conn = engine.connect()
+df = pandas.read_sql(text("SELECT * FROM power.meters"), conn)
+conn.close()
+

 # print index
 print(df.index)

--- a/docs/examples/python/conn_rest_pandas.py
+++ b/docs/examples/python/conn_rest_pandas.py
 import pandas
-from sqlalchemy import create_engine
+from sqlalchemy import create_engine, text

 engine = create_engine("taosrest://root:taosdata@localhost:6041")
-df: pandas.DataFrame = pandas.read_sql("SELECT * FROM power.meters", engine)
+conn = engine.connect()
+df: pandas.DataFrame = pandas.read_sql(text("SELECT * FROM power.meters"), conn)
+conn.close()

 # print index
 print(df.index)

--- a/docs/examples/python/connect_rest_examples.py
+++ b/docs/examples/python/connect_rest_examples.py
 # ANCHOR: connect
 from taosrest import connect, TaosRestConnection, TaosRestCursor

-conn: TaosRestConnection = connect(url="http://localhost:6041",
-                                   user="root",
-                                   password="taosdata",
-                                   timeout=30)
+conn = connect(url="http://localhost:6041",
+               user="root",
+               password="taosdata",
+               timeout=30)

 # ANCHOR_END: connect
 # ANCHOR: basic
 # create STable
-cursor: TaosRestCursor = conn.cursor()
+cursor = conn.cursor()
 cursor.execute("DROP DATABASE IF EXISTS power")
 cursor.execute("CREATE DATABASE power")
-cursor.execute("CREATE STABLE power.meters (ts TIMESTAMP, current FLOAT, voltage INT, phase FLOAT) TAGS (location BINARY(64), groupId INT)")
+cursor.execute(
+    "CREATE STABLE power.meters (ts TIMESTAMP, current FLOAT, voltage INT, phase FLOAT) TAGS (location BINARY(64), groupId INT)")

 # insert data
 cursor.execute("""INSERT INTO power.d1001 USING power.meters TAGS('California.SanFrancisco', 2) VALUES ('2018-10-03 14:38:05.000', 10.30000, 219, 0.31000) ('2018-10-03 14:38:15.000', 12.60000, 218, 0.33000) ('2018-10-03 14:38:16.800', 12.30000, 221, 0.31000)
@@ -28,7 +29,7 @@ print("queried row count:", cursor.rowcount)
 # get column names from cursor
 column_names = [meta[0] for meta in cursor.description]
 # get rows
-data: list[tuple] = cursor.fetchall()
+data = cursor.fetchall()
 print(column_names)
 for row in data:
    print(row)

--- a/docs/examples/python/connection_usage_native_reference.py
+++ b/docs/examples/python/connection_usage_native_reference.py
@@ -8,7 +8,7 @@ conn.execute("CREATE DATABASE test")
 # change database. same as execute "USE db"
 conn.select_db("test")
 conn.execute("CREATE STABLE weather(ts TIMESTAMP, temperature FLOAT) TAGS (location INT)")
-affected_row: int = conn.execute("INSERT INTO t1 USING weather TAGS(1) VALUES (now, 23.5) (now+1m, 23.5) (now+2m, 24.4)")
+affected_row = conn.execute("INSERT INTO t1 USING weather TAGS(1) VALUES (now, 23.5) (now+1m, 23.5) (now+2m, 24.4)")
 print("affected_row", affected_row)
 # output:
 # affected_row 3
@@ -16,10 +16,10 @@ print("affected_row", affected_row)

 # ANCHOR: query
 # Execute a sql and get its result set. It's useful for SELECT statement
-result: taos.TaosResult = conn.query("SELECT * from weather")
+result = conn.query("SELECT * from weather")

 # Get fields from result
-fields: taos.field.TaosFields = result.fields
+fields = result.fields
 for field in fields:
    print(field)  # {name: ts, type: 9, bytes: 8}


--- a/docs/examples/python/fast_write_example.py
+++ b/docs/examples/python/fast_write_example.py
 # install dependencies:
 # recommend python >= 3.8
-# pip3 install faster-fifo
 #

 import logging
 import math
+import multiprocessing
 import sys
 import time
 import os
-from multiprocessing import Process
-from faster_fifo import Queue
+from multiprocessing import Process, Queue
 from mockdatasource import MockDataSource
 from queue import Empty
 from typing import List
@@ -22,8 +21,7 @@ TABLE_COUNT = 1000
 QUEUE_SIZE = 1000000
 MAX_BATCH_SIZE = 3000

-read_processes = []
-write_processes = []
+_DONE_MESSAGE = '__DONE__'


 def get_connection():
@@ -44,41 +42,64 @@ def get_connection():

 # ANCHOR: read

-def run_read_task(task_id: int, task_queues: List[Queue]):
+def run_read_task(task_id: int, task_queues: List[Queue], infinity):
    table_count_per_task = TABLE_COUNT // READ_TASK_COUNT
-    data_source = MockDataSource(f"tb{task_id}", table_count_per_task)
+    data_source = MockDataSource(f"tb{task_id}", table_count_per_task, infinity)
    try:
        for batch in data_source:
+            if isinstance(batch, tuple):
+                batch = [batch]
            for table_id, rows in batch:
                # hash data to different queue
                i = table_id % len(task_queues)
                # block putting forever when the queue is full
-                task_queues[i].put_many(rows, block=True, timeout=-1)
+                for row in rows:
+                    task_queues[i].put(row)
+        if not infinity:
+            for queue in task_queues:
+                queue.put(_DONE_MESSAGE)
    except KeyboardInterrupt:
        pass
+    finally:
+        logging.info('read task over')


 # ANCHOR_END: read

+
 # ANCHOR: write
-def run_write_task(task_id: int, queue: Queue):
+def run_write_task(task_id: int, queue: Queue, done_queue: Queue):
    from sql_writer import SQLWriter
    log = logging.getLogger(f"WriteTask-{task_id}")
    writer = SQLWriter(get_connection)
    lines = None
    try:
        while True:
-            try:
-                # get as many as possible
-                lines = queue.get_many(block=False, max_messages_to_get=MAX_BATCH_SIZE)
+            over = False
+            lines = []
+            for _ in range(MAX_BATCH_SIZE):
+                try:
+                    line = queue.get_nowait()
+                    if line == _DONE_MESSAGE:
+                        over = True
+                        break
+                    if line:
+                        lines.append(line)
+                except Empty:
+                    time.sleep(0.1)
+            if len(lines) > 0:
                writer.process_lines(lines)
-            except Empty:
-                time.sleep(0.01)
+            if over:
+                done_queue.put(_DONE_MESSAGE)
+                break
    except KeyboardInterrupt:
        pass
    except BaseException as e:
        log.debug(f"lines={lines}")
        raise e
+    finally:
+        writer.close()
+        log.debug('write task over')


 # ANCHOR_END: write
@@ -103,47 +124,64 @@ def set_global_config():


 # ANCHOR: monitor
-def run_monitor_process():
+def run_monitor_process(done_queue: Queue):
    log = logging.getLogger("DataBaseMonitor")
-    conn = get_connection()
-    conn.execute("DROP DATABASE IF EXISTS test")
-    conn.execute("CREATE DATABASE test")
-    conn.execute("CREATE STABLE test.meters (ts TIMESTAMP, current FLOAT, voltage INT, phase FLOAT) "
-                 "TAGS (location BINARY(64), groupId INT)")
+    conn = None
+    try:
+        conn = get_connection()

-    def get_count():
-        res = conn.query("SELECT count(*) FROM test.meters")
-        rows = res.fetch_all()
-        return rows[0][0] if rows else 0
+        def get_count():
+            res = conn.query("SELECT count(*) FROM test.meters")
+            rows = res.fetch_all()
+            return rows[0][0] if rows else 0

-    last_count = 0
-    while True:
-        time.sleep(10)
-        count = get_count()
-        log.info(f"count={count} speed={(count - last_count) / 10}")
-        last_count = count
+        last_count = 0
+        while True:
+            try:
+                done = done_queue.get_nowait()
+                if done == _DONE_MESSAGE:
+                    break
+            except Empty:
+                pass
+            time.sleep(10)
+            count = get_count()
+            log.info(f"count={count} speed={(count - last_count) / 10}")
+            last_count = count
+    finally:
+        conn.close()


 # ANCHOR_END: monitor
 # ANCHOR: main
-def main():
+def main(infinity):
    set_global_config()
    logging.info(f"READ_TASK_COUNT={READ_TASK_COUNT}, WRITE_TASK_COUNT={WRITE_TASK_COUNT}, "
                 f"TABLE_COUNT={TABLE_COUNT}, QUEUE_SIZE={QUEUE_SIZE}, MAX_BATCH_SIZE={MAX_BATCH_SIZE}")

-    monitor_process = Process(target=run_monitor_process)
+    conn = get_connection()
+    conn.execute("DROP DATABASE IF EXISTS test")
+    conn.execute("CREATE DATABASE IF NOT EXISTS test")
+    conn.execute("CREATE STABLE IF NOT EXISTS test.meters (ts TIMESTAMP, current FLOAT, voltage INT, phase FLOAT) "
+                 "TAGS (location BINARY(64), groupId INT)")
+    conn.close()
+
+    done_queue = Queue()
+    monitor_process = Process(target=run_monitor_process, args=(done_queue,))
    monitor_process.start()
-    time.sleep(3)  # waiting for database ready.
+    logging.debug(f"monitor task started with pid {monitor_process.pid}")

    task_queues: List[Queue] = []
+    write_processes = []
+    read_processes = []
+
    # create task queues
    for i in range(WRITE_TASK_COUNT):
-        queue = Queue(max_size_bytes=QUEUE_SIZE)
+        queue = Queue()
        task_queues.append(queue)

    # create write processes
    for i in range(WRITE_TASK_COUNT):
-        p = Process(target=run_write_task, args=(i, task_queues[i]))
+        p = Process(target=run_write_task, args=(i, task_queues[i], done_queue))
        p.start()
        logging.debug(f"WriteTask-{i} started with pid {p.pid}")
        write_processes.append(p)
@@ -151,13 +189,19 @@ def main():
    # create read processes
    for i in range(READ_TASK_COUNT):
        queues = assign_queues(i, task_queues)
-        p = Process(target=run_read_task, args=(i, queues))
+        p = Process(target=run_read_task, args=(i, queues, infinity))
        p.start()
        logging.debug(f"ReadTask-{i} started with pid {p.pid}")
        read_processes.append(p)

    try:
        monitor_process.join()
+        for p in read_processes:
+            p.join()
+        for p in write_processes:
+            p.join()
+        time.sleep(1)
+        return
    except KeyboardInterrupt:
        monitor_process.terminate()
        [p.terminate() for p in read_processes]
@@ -176,5 +220,6 @@ def assign_queues(read_task_id, task_queues):


 if __name__ == '__main__':
-    main()
+    multiprocessing.set_start_method('spawn')
+    main(False)
 # ANCHOR_END: main
--- a/docs/examples/python/kafka_example.py
+++ b/docs/examples/python/kafka_example.py
@@ -26,7 +26,8 @@ class Consumer(object):
        'bath_consume': True,
        'batch_size': 1000,
        'async_model': True,
-        'workers': 10
+        'workers': 10,
+        'testing': False
    }

    LOCATIONS = ['California.SanFrancisco', 'California.LosAngles', 'California.SanDiego', 'California.SanJose',
@@ -46,11 +47,12 @@ class Consumer(object):
    def __init__(self, **configs):
        self.config: dict = self.DEFAULT_CONFIGS
        self.config.update(configs)
-        self.consumer = KafkaConsumer(
-            self.config.get('kafka_topic'),  # topic
-            bootstrap_servers=self.config.get('kafka_brokers'),
-            group_id=self.config.get('kafka_group_id'),
-        )
+        if not self.config.get('testing'):
+            self.consumer = KafkaConsumer(
+                self.config.get('kafka_topic'),  # topic
+                bootstrap_servers=self.config.get('kafka_brokers'),
+                group_id=self.config.get('kafka_group_id'),
+            )
        self.taos = taos.connect(
            host=self.config.get('taos_host'),
            user=self.config.get('taos_user'),
@@ -60,7 +62,7 @@ class Consumer(object):
        )
        if self.config.get('async_model'):
            self.pool = ThreadPoolExecutor(max_workers=self.config.get('workers'))
-            self.tasks: list[Future] = []
+            self.tasks = []
        # tags and table mapping # key: {location}_{groupId} value:
        self.tag_table_mapping = {}
        i = 0
@@ -115,14 +117,14 @@ class Consumer(object):
        if self.taos is not None:
            self.taos.close()

-    def _run(self, f: Callable[[ConsumerRecord], bool]):
+    def _run(self, f):
        for message in self.consumer:
            if self.config.get('async_model'):
                self.pool.submit(f(message))
            else:
                f(message)

-    def _run_batch(self, f: Callable[[list[list[ConsumerRecord]]], None]):
+    def _run_batch(self, f):
        while True:
            messages = self.consumer.poll(timeout_ms=500, max_records=self.config.get('batch_size'))
            if messages:
@@ -140,7 +142,7 @@ class Consumer(object):
        logging.info('## insert sql %s', sql)
        return self.taos.execute(sql=sql) == 1

-    def _to_taos_batch(self, messages: list[list[ConsumerRecord]]):
+    def _to_taos_batch(self, messages):
        sql = self._build_sql_batch(messages=messages)
        if len(sql) == 0:  # decode error, skip
            return
@@ -162,7 +164,7 @@ class Consumer(object):
        table_name = self._get_table_name(location=location, group_id=group_id)
        return self.INSERT_PART_SQL.format(table_name, ts, current, voltage, phase)

-    def _build_sql_batch(self, messages: list[list[ConsumerRecord]]) -> str:
+    def _build_sql_batch(self, messages) -> str:
        sql_list = []
        for partition_messages in messages:
            for message in partition_messages:
@@ -186,7 +188,54 @@ def _get_location_and_group(key: str) -> (str, int):
    return fields[0], fields[1]


+def test_to_taos(consumer: Consumer):
+    msg = {
+        'location': 'California.SanFrancisco',
+        'groupId': 1,
+        'ts': '2022-12-06 15:13:38.643',
+        'current': 3.41,
+        'voltage': 105,
+        'phase': 0.02027,
+    }
+    record = ConsumerRecord(checksum=None, headers=None, offset=1, key=None, value=json.dumps(msg), partition=1,
+                            topic='test', serialized_key_size=None, serialized_header_size=None,
+                            serialized_value_size=None, timestamp=time.time(), timestamp_type=None)
+    assert consumer._to_taos(message=record)
+
+
+def test_to_taos_batch(consumer: Consumer):
+    records = [
+        [
+            ConsumerRecord(checksum=None, headers=None, offset=1, key=None,
+                           value=json.dumps({'location': 'California.SanFrancisco',
+                                             'groupId': 1,
+                                             'ts': '2022-12-06 15:13:38.643',
+                                             'current': 3.41,
+                                             'voltage': 105,
+                                             'phase': 0.02027, }),
+                           partition=1, topic='test', serialized_key_size=None, serialized_header_size=None,
+                           serialized_value_size=None, timestamp=time.time(), timestamp_type=None),
+            ConsumerRecord(checksum=None, headers=None, offset=1, key=None,
+                           value=json.dumps({'location': 'California.LosAngles',
+                                             'groupId': 2,
+                                             'ts': '2022-12-06 15:13:39.643',
+                                             'current': 3.41,
+                                             'voltage': 102,
+                                             'phase': 0.02027, }),
+                           partition=1, topic='test', serialized_key_size=None, serialized_header_size=None,
+                           serialized_value_size=None, timestamp=time.time(), timestamp_type=None),
+        ]
+    ]
+
+    consumer._to_taos_batch(messages=records)
+
+
 if __name__ == '__main__':
-    consumer = Consumer(async_model=True)
+    consumer = Consumer(async_model=True, testing=True)
+    # init env
    consumer.init_env()
-    consumer.consume()
\ No newline at end of file
+    # consumer.consume()
+    # test build sql
+    # test build sql batch
+    test_to_taos(consumer)
+    test_to_taos_batch(consumer)
--- a/docs/examples/python/mockdatasource.py
+++ b/docs/examples/python/mockdatasource.py
@@ -10,13 +10,14 @@ class MockDataSource:
        "9.4,118,0.141,California.SanFrancisco,4"
    ]

-    def __init__(self, tb_name_prefix, table_count):
+    def __init__(self, tb_name_prefix, table_count, infinity=True):
        self.table_name_prefix = tb_name_prefix + "_"
        self.table_count = table_count
        self.max_rows = 10000000
        self.current_ts = round(time.time() * 1000) - self.max_rows * 100
        # [(tableId, tableName, values),]
        self.data = self._init_data()
+        self.infinity = infinity

    def _init_data(self):
        lines = self.samples * (self.table_count // 5 + 1)
@@ -28,14 +29,19 @@ class MockDataSource:

    def __iter__(self):
        self.row = 0
-        return self
+        if not self.infinity:
+            return iter(self._iter_data())
+        else:
+            return self

    def __next__(self):
        """
        next 1000 rows for each table.
        return: {tableId:[row,...]}
        """
-        # generate 1000 timestamps
+        return self._iter_data()
+
+    def _iter_data(self):
        ts = []
        for _ in range(1000):
            self.current_ts += 100
@@ -47,3 +53,9 @@ class MockDataSource:
            rows = [table_name + ',' + t + ',' + values for t in ts]
            result.append((table_id, rows))
        return result
+
+
+if __name__ == '__main__':
+    datasource = MockDataSource('t', 10, False)
+    for data in datasource:
+        print(data)
--- a/docs/examples/python/sql_writer.py
+++ b/docs/examples/python/sql_writer.py
@@ -10,6 +10,7 @@ class SQLWriter:
        self._tb_tags = {}
        self._conn = get_connection_func()
        self._max_sql_length = self.get_max_sql_length()
+        self._conn.execute("create database if not exists test")
        self._conn.execute("USE test")

    def get_max_sql_length(self):
@@ -20,7 +21,7 @@ class SQLWriter:
                return int(r[1])
        return 1024 * 1024

-    def process_lines(self, lines: str):
+    def process_lines(self, lines: [str]):
        """
        :param lines: [[tbName,ts,current,voltage,phase,location,groupId]]
        """
@@ -60,6 +61,7 @@ class SQLWriter:
            buf.append(q)
            sql_len += len(q)
        sql += " ".join(buf)
+        self.create_tables()
        self.execute_sql(sql)
        self._tb_values.clear()

@@ -88,3 +90,22 @@ class SQLWriter:
        except BaseException as e:
            self.log.error("Execute SQL: %s", sql)
            raise e
+
+    def close(self):
+        if self._conn:
+            self._conn.close()
+
+
+if __name__ == '__main__':
+    def get_connection_func():
+        conn = taos.connect()
+        return conn
+
+
+    writer = SQLWriter(get_connection_func=get_connection_func)
+    writer.execute_sql(
+        "create stable if not exists meters (ts timestamp, current float, voltage int, phase float) "
+        "tags (location binary(64), groupId int)")
+    writer.execute_sql(
+        "INSERT INTO d21001 USING meters TAGS ('California.SanFrancisco', 2) "
+        "VALUES ('2021-07-13 14:06:32.272', 10.2, 219, 0.32)")
--- a/docs/examples/python/tmq_example.py
+++ b/docs/examples/python/tmq_example.py
@@ -19,8 +19,14 @@ def init_tmq_env(db, topic):
    conn.execute("insert into tb3 values (now, 3, 3.0, 'tmq test')")


+def cleanup(db, topic):
+    conn = taos.connect()
+    conn.execute("drop topic if exists {}".format(topic))
+    conn.execute("drop database if exists {}".format(db))
+
+
 if __name__ == '__main__':
-    init_tmq_env("tmq_test", "tmq_test_topic") # init env
+    init_tmq_env("tmq_test", "tmq_test_topic")  # init env
    consumer = Consumer(
        {
            "group.id": "tg2",
@@ -33,9 +39,9 @@ if __name__ == '__main__':

    try:
        while True:
-            res = consumer.poll(100)
+            res = consumer.poll(1)
            if not res:
-                continue
+                break
            err = res.error()
            if err is not None:
                raise err
@@ -46,3 +52,4 @@ if __name__ == '__main__':
    finally:
        consumer.unsubscribe()
        consumer.close()
+        cleanup("tmq_test", "tmq_test_topic")
--- a/docs/zh/12-taos-sql/02-database.md
+++ b/docs/zh/12-taos-sql/02-database.md
@@ -58,7 +58,7 @@ database_option: {
 - WAL_FSYNC_PERIOD：当 WAL 参数设置为 2 时，落盘的周期。默认为 3000，单位毫秒。最小为 0，表示每次写入立即落盘；最大为 180000，即三分钟。
 - MAXROWS：文件块中记录的最大条数，默认为 4096 条。
 - MINROWS：文件块中记录的最小条数，默认为 100 条。
- KEEP：表示数据文件保存的天数，缺省值为 3650，取值范围 [1, 365000]，且必须大于或等于 DURATION 参数值。数据库会自动删除保存时间超过 KEEP 值的数据。KEEP 可以使用加单位的表示形式，如 KEEP 100h、KEEP 10d 等，支持 m（分钟）、h（小时）和 d（天）三个单位。也可以不写单位，如 KEEP 50，此时默认单位为天。
+- KEEP：表示数据文件保存的天数，缺省值为 3650，取值范围 [1, 365000]，且必须大于或等于 DURATION 参数值。数据库会自动删除保存时间超过 KEEP 值的数据。KEEP 可以使用加单位的表示形式，如 KEEP 100h、KEEP 10d 等，支持 m（分钟）、h（小时）和 d（天）三个单位。也可以不写单位，如 KEEP 50，此时默认单位为天。企业版支持[多级存储](https://docs.taosdata.com/tdinternal/arch/#%E5%A4%9A%E7%BA%A7%E5%AD%98%E5%82%A8)功能, 因此, 可以设置多个保存时间（多个以英文逗号分隔，最多 3 个，满足 keep 0 <= keep 1 <= keep 2，如 KEEP 100h,100d,3650d）; 社区版不支持多级存储功能（即使配置了多个保存时间, 也不会生效, KEEP 会取最大的保存时间）。
 - PAGES：一个 VNODE 中元数据存储引擎的缓存页个数，默认为 256，最小 64。一个 VNODE 元数据存储占用 PAGESIZE \* PAGES，默认情况下为 1MB 内存。
 - PAGESIZE：一个 VNODE 中元数据存储引擎的页大小，单位为 KB，默认为 4 KB。范围为 1 到 16384，即 1 KB 到 16 MB。
 - PRECISION：数据库的时间戳精度。ms 表示毫秒，us 表示微秒，ns 表示纳秒，默认 ms 毫秒。

--- a/docs/zh/14-reference/12-config/index.md
+++ b/docs/zh/14-reference/12-config/index.md
@@ -323,6 +323,7 @@ charset 的有效值是 UTF-8。
 | 适用范围 | 仅服务端适用                               |
 | 含义     | 数据文件目录，所有的数据文件都将写入该目录 |
 | 缺省值   | /var/lib/taos                              |
+| 补充说明 | [多级存储](https://docs.taosdata.com/tdinternal/arch/#%E5%A4%9A%E7%BA%A7%E5%AD%98%E5%82%A8) 功能需要与 [KEEP](https://docs.taosdata.com/taos-sql/database/#%E5%8F%82%E6%95%B0%E8%AF%B4%E6%98%8E) 参数配合使用 |

 ### tempDir


--- a/include/common/tdataformat.h
+++ b/include/common/tdataformat.h
@@ -146,9 +146,9 @@ extern void (*tColDataCalcSMA[])(SColData *pColData, int64_t *sum, int64_t *max,
 int32_t tColDataAddValueByBind(SColData *pColData, TAOS_MULTI_BIND *pBind);
 void    tColDataSortMerge(SArray *colDataArr);

-//for raw block
-int32_t tColDataAddValueByDataBlock(SColData *pColData, int8_t type, int32_t bytes,
-                                    int32_t nRows, char* lengthOrbitmap, char *data);
+// for raw block
+int32_t tColDataAddValueByDataBlock(SColData *pColData, int8_t type, int32_t bytes, int32_t nRows, char *lengthOrbitmap,
+                                    char *data);
 // for encode/decode
 int32_t tPutColData(uint8_t *pBuf, SColData *pColData);
 int32_t tGetColData(uint8_t *pBuf, SColData *pColData);
@@ -261,7 +261,13 @@ struct STag {

 // STSchema ================================
 STSchema *tBuildTSchema(SSchema *aSchema, int32_t numOfCols, int32_t version);
-void      tDestroyTSchema(STSchema *pTSchema);
+#define tDestroyTSchema(pTSchema) \
+  do {                            \
+    if (pTSchema) {               \
+      taosMemoryFree(pTSchema);   \
+      pTSchema = NULL;            \
+    }                             \
+  } while (0)

 #endif


--- a/include/libs/wal/wal.h
+++ b/include/libs/wal/wal.h
@@ -201,6 +201,7 @@ int32_t walFetchHead(SWalReader *pRead, int64_t ver, SWalCkHead *pHead);
 int32_t walFetchBody(SWalReader *pRead, SWalCkHead **ppHead);
 int32_t walSkipFetchBody(SWalReader *pRead, const SWalCkHead *pHead);

+SWalRef *walRefFirstVer(SWal *, SWalRef *);
 SWalRef *walRefCommittedVer(SWal *);

 SWalRef *walOpenRef(SWal *);

--- a/packaging/tools/install.sh
+++ b/packaging/tools/install.sh
@@ -746,7 +746,7 @@ function is_version_compatible() {
 deb_erase() {
  confirm=""
  while [ "" == "${confirm}" ]; do
-    echo -e -n "${RED}Exist tdengine deb detected, do you want to remove it? [yes|no] ${NC}:"
+    echo -e -n "${RED}Existing TDengine deb is detected, do you want to remove it? [yes|no] ${NC}:"
    read confirm
    if [ "yes" == "$confirm" ]; then
      ${csudo}dpkg --remove tdengine ||:
@@ -760,7 +760,7 @@ deb_erase() {
 rpm_erase() {
  confirm=""
  while [ "" == "${confirm}" ]; do
-    echo -e -n "${RED}Exist tdengine rpm detected, do you want to remove it? [yes|no] ${NC}:"
+    echo -e -n "${RED}Existing TDengine rpm is detected, do you want to remove it? [yes|no] ${NC}:"
    read confirm
    if [ "yes" == "$confirm" ]; then
      ${csudo}rpm -e tdengine ||:

--- a/source/common/src/tdataformat.c
+++ b/source/common/src/tdataformat.c
@@ -1532,10 +1532,6 @@ STSchema *tBuildTSchema(SSchema *aSchema, int32_t numOfCols, int32_t version) {
  return pTSchema;
 }

-void tDestroyTSchema(STSchema *pTSchema) {
-  if (pTSchema) taosMemoryFree(pTSchema);
-}
-
 // SColData ========================================
 void tColDataDestroy(void *ph) {
  SColData *pColData = (SColData *)ph;

--- a/source/dnode/vnode/src/inc/tsdb.h
+++ b/source/dnode/vnode/src/inc/tsdb.h
@@ -206,6 +206,7 @@ int32_t tsdbCmprColData(SColData *pColData, int8_t cmprAlg, SBlockCol *pBlockCol
                        uint8_t **ppBuf);
 int32_t tsdbDecmprColData(uint8_t *pIn, SBlockCol *pBlockCol, int8_t cmprAlg, int32_t nVal, SColData *pColData,
                          uint8_t **ppBuf);
+int32_t tRowInfoCmprFn(const void *p1, const void *p2);
 // tsdbMemTable ==============================================================================================
 // SMemTable
 int32_t  tsdbMemTableCreate(STsdb *pTsdb, SMemTable **ppMemTable);

--- a/source/dnode/vnode/src/inc/vnodeInt.h
+++ b/source/dnode/vnode/src/inc/vnodeInt.h
@@ -252,7 +252,7 @@ int32_t tsdbSnapReaderClose(STsdbSnapReader** ppReader);
 int32_t tsdbSnapRead(STsdbSnapReader* pReader, uint8_t** ppData);
 // STsdbSnapWriter ========================================
 int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWriter** ppWriter);
-int32_t tsdbSnapWrite(STsdbSnapWriter* pWriter, uint8_t* pData, uint32_t nData);
+int32_t tsdbSnapWrite(STsdbSnapWriter* pWriter, SSnapDataHdr* pHdr);
 int32_t tsdbSnapWriterPrepareClose(STsdbSnapWriter* pWriter);
 int32_t tsdbSnapWriterClose(STsdbSnapWriter** ppWriter, int8_t rollback);
 // STqSnapshotReader ==

--- a/source/dnode/vnode/src/meta/metaQuery.c
+++ b/source/dnode/vnode/src/meta/metaQuery.c
@@ -706,9 +706,8 @@ int32_t metaGetTbTSchemaEx(SMeta *pMeta, tb_uid_t suid, tb_uid_t uid, int32_t sv
    }
  }

-  if (sver <= 0) {
-    metaError("meta/query: incorrect sver: %" PRId32 ".", sver);
-    code = TSDB_CODE_FAILED;
+  if (ASSERTS(sver > 0, __FILE__, __LINE__, "failed to get table schema version: %d", sver)) {
+    code = TSDB_CODE_NOT_FOUND;
    goto _exit;
  }


--- a/source/dnode/vnode/src/sma/smaSnapshot.c
+++ b/source/dnode/vnode/src/sma/smaSnapshot.c
@@ -446,10 +446,10 @@ int32_t rsmaSnapWrite(SRSmaSnapWriter* pWriter, uint8_t* pData, uint32_t nData)
  // rsma1/rsma2
  if (pHdr->type == SNAP_DATA_RSMA1) {
    pHdr->type = SNAP_DATA_TSDB;
-    code = tsdbSnapWrite(pWriter->pDataWriter[0], pData, nData);
+    code = tsdbSnapWrite(pWriter->pDataWriter[0], pHdr);
  } else if (pHdr->type == SNAP_DATA_RSMA2) {
    pHdr->type = SNAP_DATA_TSDB;
-    code = tsdbSnapWrite(pWriter->pDataWriter[1], pData, nData);
+    code = tsdbSnapWrite(pWriter->pDataWriter[1], pHdr);
  } else if (pHdr->type == SNAP_DATA_QTASK) {
    code = rsmaSnapWriteQTaskInfo(pWriter, pData, nData);
  } else {

--- a/source/dnode/vnode/src/tq/tq.c
+++ b/source/dnode/vnode/src/tq/tq.c
@@ -520,7 +520,12 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) {
            tqOffsetResetToData(&fetchOffsetNew, 0, 0);
          }
        } else {
-          tqOffsetResetToLog(&fetchOffsetNew, walGetFirstVer(pTq->pVnode->pWal));
+          pHandle->pRef = walRefFirstVer(pTq->pVnode->pWal, pHandle->pRef);
+          if (pHandle->pRef == NULL) {
+            terrno = TSDB_CODE_OUT_OF_MEMORY;
+            return -1;
+          }
+          tqOffsetResetToLog(&fetchOffsetNew, pHandle->pRef->refVer - 1);
        }
      } else if (reqOffset.type == TMQ_OFFSET__RESET_LATEST) {
        if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) {

--- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c
+++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c
--- a/source/dnode/vnode/src/tsdb/tsdbUtil.c
+++ b/source/dnode/vnode/src/tsdb/tsdbUtil.c
@@ -758,7 +758,7 @@ int32_t tsdbRowMergerAdd(SRowMerger *pMerger, TSDBROW *pRow, STSchema *pTSchema)

            pTColVal->value.nData = pColVal->value.nData;
            if (pTColVal->value.nData) {
-              memcpy(pTColVal->value.pData,  pColVal->value.pData, pTColVal->value.nData);
+              memcpy(pTColVal->value.pData, pColVal->value.pData, pTColVal->value.nData);
            }
            pTColVal->flag = 0;
          } else {
@@ -1133,6 +1133,7 @@ _exit:
 void tBlockDataReset(SBlockData *pBlockData) {
  pBlockData->suid = 0;
  pBlockData->uid = 0;
+  pBlockData->nRow = 0;
 }

 void tBlockDataClear(SBlockData *pBlockData) {

--- a/source/dnode/vnode/src/vnd/vnodeSnapshot.c
+++ b/source/dnode/vnode/src/vnd/vnodeSnapshot.c
@@ -455,7 +455,7 @@ int32_t vnodeSnapWrite(SVSnapWriter *pWriter, uint8_t *pData, uint32_t nData) {
        if (code) goto _err;
      }

-      code = tsdbSnapWrite(pWriter->pTsdbSnapWriter, pData, nData);
+      code = tsdbSnapWrite(pWriter->pTsdbSnapWriter, pHdr);
      if (code) goto _err;
    } break;
    case SNAP_DATA_TQ_HANDLE: {

--- a/source/libs/sync/src/syncAppendEntries.c
+++ b/source/libs/sync/src/syncAppendEntries.c
@@ -89,45 +89,6 @@
 //       /\ UNCHANGED <<candidateVars, leaderVars>>
 //

-int32_t syncNodeFollowerCommit(SSyncNode* ths, SyncIndex newCommitIndex) {
-  ASSERT(false && "deprecated");
-  if (ths->state != TAOS_SYNC_STATE_FOLLOWER) {
-    sNTrace(ths, "can not do follower commit");
-    return -1;
-  }
-
-  // maybe update commit index, leader notice me
-  if (newCommitIndex > ths->commitIndex) {
-    // has commit entry in local
-    if (newCommitIndex <= ths->pLogStore->syncLogLastIndex(ths->pLogStore)) {
-      // advance commit index to sanpshot first
-      SSnapshot snapshot;
-      ths->pFsm->FpGetSnapshotInfo(ths->pFsm, &snapshot);
-      if (snapshot.lastApplyIndex >= 0 && snapshot.lastApplyIndex > ths->commitIndex) {
-        SyncIndex commitBegin = ths->commitIndex;
-        SyncIndex commitEnd = snapshot.lastApplyIndex;
-        ths->commitIndex = snapshot.lastApplyIndex;
-        sNTrace(ths, "commit by snapshot from index:%" PRId64 " to index:%" PRId64, commitBegin, commitEnd);
-      }
-
-      SyncIndex beginIndex = ths->commitIndex + 1;
-      SyncIndex endIndex = newCommitIndex;
-
-      // update commit index
-      ths->commitIndex = newCommitIndex;
-
-      // call back Wal
-      int32_t code = ths->pLogStore->syncLogUpdateCommitIndex(ths->pLogStore, ths->commitIndex);
-      ASSERT(code == 0);
-
-      code = syncNodeDoCommit(ths, beginIndex, endIndex, ths->state);
-      ASSERT(code == 0);
-    }
-  }
-
-  return 0;
-}
-
 SSyncRaftEntry* syncBuildRaftEntryFromAppendEntries(const SyncAppendEntries* pMsg) {
  SSyncRaftEntry* pEntry = taosMemoryMalloc(pMsg->dataLen);
  if (pEntry == NULL) {
@@ -232,256 +193,3 @@ _IGNORE:
  rpcFreeCont(rpcRsp.pCont);
  return 0;
 }
-
-int32_t syncNodeOnAppendEntriesOld(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
-  SyncAppendEntries* pMsg = pRpcMsg->pCont;
-  SRpcMsg            rpcRsp = {0};
-
-  // if already drop replica, do not process
-  if (!syncNodeInRaftGroup(ths, &(pMsg->srcId))) {
-    syncLogRecvAppendEntries(ths, pMsg, "not in my config");
-    goto _IGNORE;
-  }
-
-  // prepare response msg
-  int32_t code = syncBuildAppendEntriesReply(&rpcRsp, ths->vgId);
-  if (code != 0) {
-    syncLogRecvAppendEntries(ths, pMsg, "build rsp error");
-    goto _IGNORE;
-  }
-
-  SyncAppendEntriesReply* pReply = rpcRsp.pCont;
-  pReply->srcId = ths->myRaftId;
-  pReply->destId = pMsg->srcId;
-  pReply->term = ths->raftStore.currentTerm;
-  pReply->success = false;
-  // pReply->matchIndex = ths->pLogStore->syncLogLastIndex(ths->pLogStore);
-  pReply->matchIndex = SYNC_INDEX_INVALID;
-  pReply->lastSendIndex = pMsg->prevLogIndex + 1;
-  pReply->startTime = ths->startTime;
-
-  if (pMsg->term < ths->raftStore.currentTerm) {
-    syncLogRecvAppendEntries(ths, pMsg, "reject, small term");
-    goto _SEND_RESPONSE;
-  }
-
-  if (pMsg->term > ths->raftStore.currentTerm) {
-    pReply->term = pMsg->term;
-  }
-
-  syncNodeStepDown(ths, pMsg->term);
-  syncNodeResetElectTimer(ths);
-
-  SyncIndex startIndex = ths->pLogStore->syncLogBeginIndex(ths->pLogStore);
-  SyncIndex lastIndex = ths->pLogStore->syncLogLastIndex(ths->pLogStore);
-
-  if (pMsg->prevLogIndex > lastIndex) {
-    syncLogRecvAppendEntries(ths, pMsg, "reject, index not match");
-    goto _SEND_RESPONSE;
-  }
-
-  if (pMsg->prevLogIndex >= startIndex) {
-    SyncTerm myPreLogTerm = syncNodeGetPreTerm(ths, pMsg->prevLogIndex + 1);
-    // ASSERT(myPreLogTerm != SYNC_TERM_INVALID);
-    if (myPreLogTerm == SYNC_TERM_INVALID) {
-      syncLogRecvAppendEntries(ths, pMsg, "reject, pre-term invalid");
-      goto _SEND_RESPONSE;
-    }
-
-    if (myPreLogTerm != pMsg->prevLogTerm) {
-      syncLogRecvAppendEntries(ths, pMsg, "reject, pre-term not match");
-      goto _SEND_RESPONSE;
-    }
-  }
-
-  // accept
-  pReply->success = true;
-  bool hasAppendEntries = pMsg->dataLen > 0;
-  if (hasAppendEntries) {
-    SSyncRaftEntry* pAppendEntry = syncEntryBuildFromAppendEntries(pMsg);
-    ASSERT(pAppendEntry != NULL);
-
-    SyncIndex appendIndex = pMsg->prevLogIndex + 1;
-
-    LRUHandle* hLocal = NULL;
-    LRUHandle* hAppend = NULL;
-
-    int32_t         code = 0;
-    SSyncRaftEntry* pLocalEntry = NULL;
-    SLRUCache*      pCache = ths->pLogStore->pCache;
-    hLocal = taosLRUCacheLookup(pCache, &appendIndex, sizeof(appendIndex));
-    if (hLocal) {
-      pLocalEntry = (SSyncRaftEntry*)taosLRUCacheValue(pCache, hLocal);
-      code = 0;
-
-      ths->pLogStore->cacheHit++;
-      sNTrace(ths, "hit cache index:%" PRId64 ", bytes:%u, %p", appendIndex, pLocalEntry->bytes, pLocalEntry);
-
-    } else {
-      ths->pLogStore->cacheMiss++;
-      sNTrace(ths, "miss cache index:%" PRId64, appendIndex);
-
-      code = ths->pLogStore->syncLogGetEntry(ths->pLogStore, appendIndex, &pLocalEntry);
-    }
-
-    if (code == 0) {
-      // get local entry success
-
-      if (pLocalEntry->term == pAppendEntry->term) {
-        // do nothing
-        sNTrace(ths, "log match, do nothing, index:%" PRId64, appendIndex);
-
-      } else {
-        // truncate
-        code = ths->pLogStore->syncLogTruncate(ths->pLogStore, appendIndex);
-        if (code != 0) {
-          char logBuf[128];
-          snprintf(logBuf, sizeof(logBuf), "ignore, truncate error, append-index:%" PRId64, appendIndex);
-          syncLogRecvAppendEntries(ths, pMsg, logBuf);
-
-          if (hLocal) {
-            taosLRUCacheRelease(ths->pLogStore->pCache, hLocal, false);
-          } else {
-            syncEntryDestroy(pLocalEntry);
-          }
-
-          if (hAppend) {
-            taosLRUCacheRelease(ths->pLogStore->pCache, hAppend, false);
-          } else {
-            syncEntryDestroy(pAppendEntry);
-          }
-
-          goto _IGNORE;
-        }
-
-        ASSERT(pAppendEntry->index == appendIndex);
-
-        // append
-        code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pAppendEntry, false);
-        if (code != 0) {
-          char logBuf[128];
-          snprintf(logBuf, sizeof(logBuf), "ignore, append error, append-index:%" PRId64, appendIndex);
-          syncLogRecvAppendEntries(ths, pMsg, logBuf);
-
-          if (hLocal) {
-            taosLRUCacheRelease(ths->pLogStore->pCache, hLocal, false);
-          } else {
-            syncEntryDestroy(pLocalEntry);
-          }
-
-          if (hAppend) {
-            taosLRUCacheRelease(ths->pLogStore->pCache, hAppend, false);
-          } else {
-            syncEntryDestroy(pAppendEntry);
-          }
-
-          goto _IGNORE;
-        }
-
-        syncCacheEntry(ths->pLogStore, pAppendEntry, &hAppend);
-      }
-
-    } else {
-      if (terrno == TSDB_CODE_WAL_LOG_NOT_EXIST) {
-        // log not exist
-
-        // truncate
-        code = ths->pLogStore->syncLogTruncate(ths->pLogStore, appendIndex);
-        if (code != 0) {
-          char logBuf[128];
-          snprintf(logBuf, sizeof(logBuf), "ignore, log not exist, truncate error, append-index:%" PRId64, appendIndex);
-          syncLogRecvAppendEntries(ths, pMsg, logBuf);
-
-          syncEntryDestroy(pLocalEntry);
-          syncEntryDestroy(pAppendEntry);
-          goto _IGNORE;
-        }
-
-        // append
-        code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pAppendEntry, false);
-        if (code != 0) {
-          char logBuf[128];
-          snprintf(logBuf, sizeof(logBuf), "ignore, log not exist, append error, append-index:%" PRId64, appendIndex);
-          syncLogRecvAppendEntries(ths, pMsg, logBuf);
-
-          if (hLocal) {
-            taosLRUCacheRelease(ths->pLogStore->pCache, hLocal, false);
-          } else {
-            syncEntryDestroy(pLocalEntry);
-          }
-
-          if (hAppend) {
-            taosLRUCacheRelease(ths->pLogStore->pCache, hAppend, false);
-          } else {
-            syncEntryDestroy(pAppendEntry);
-          }
-
-          goto _IGNORE;
-        }
-
-        syncCacheEntry(ths->pLogStore, pAppendEntry, &hAppend);
-
-      } else {
-        // get local entry success
-        char logBuf[128];
-        snprintf(logBuf, sizeof(logBuf), "ignore, get local entry error, append-index:%" PRId64 " err:%d", appendIndex,
-                 terrno);
-        syncLogRecvAppendEntries(ths, pMsg, logBuf);
-
-        if (hLocal) {
-          taosLRUCacheRelease(ths->pLogStore->pCache, hLocal, false);
-        } else {
-          syncEntryDestroy(pLocalEntry);
-        }
-
-        if (hAppend) {
-          taosLRUCacheRelease(ths->pLogStore->pCache, hAppend, false);
-        } else {
-          syncEntryDestroy(pAppendEntry);
-        }
-
-        goto _IGNORE;
-      }
-    }
-
-    // update match index
-    pReply->matchIndex = pAppendEntry->index;
-
-    if (hLocal) {
-      taosLRUCacheRelease(ths->pLogStore->pCache, hLocal, false);
-    } else {
-      syncEntryDestroy(pLocalEntry);
-    }
-
-    if (hAppend) {
-      taosLRUCacheRelease(ths->pLogStore->pCache, hAppend, false);
-    } else {
-      syncEntryDestroy(pAppendEntry);
-    }
-
-  } else {
-    // no append entries, do nothing
-    // maybe has extra entries, no harm
-
-    // update match index
-    pReply->matchIndex = pMsg->prevLogIndex;
-  }
-
-  // maybe update commit index, leader notice me
-  syncNodeFollowerCommit(ths, pMsg->commitIndex);
-
-  syncLogRecvAppendEntries(ths, pMsg, "accept");
-  goto _SEND_RESPONSE;
-
-_IGNORE:
-  rpcFreeCont(rpcRsp.pCont);
-  return 0;
-
-_SEND_RESPONSE:
-  // msg event log
-  syncLogSendAppendEntriesReply(ths, pReply, "");
-
-  // send response
-  syncNodeSendMsgById(&pReply->destId, ths, &rpcRsp);
-  return 0;
-}
--- a/source/libs/sync/src/syncAppendEntriesReply.c
+++ b/source/libs/sync/src/syncAppendEntriesReply.c
@@ -89,63 +89,3 @@ int32_t syncNodeOnAppendEntriesReply(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
  }
  return 0;
 }
-
-int32_t syncNodeOnAppendEntriesReplyOld(SSyncNode* ths, SyncAppendEntriesReply* pMsg) {
-  int32_t ret = 0;
-
-  // if already drop replica, do not process
-  if (!syncNodeInRaftGroup(ths, &(pMsg->srcId))) {
-    syncLogRecvAppendEntriesReply(ths, pMsg, "not in my config");
-    return 0;
-  }
-
-  // drop stale response
-  if (pMsg->term < ths->raftStore.currentTerm) {
-    syncLogRecvAppendEntriesReply(ths, pMsg, "drop stale response");
-    return 0;
-  }
-
-  if (ths->state == TAOS_SYNC_STATE_LEADER) {
-    if (pMsg->term > ths->raftStore.currentTerm) {
-      syncLogRecvAppendEntriesReply(ths, pMsg, "error term");
-      syncNodeStepDown(ths, pMsg->term);
-      return -1;
-    }
-
-    ASSERT(pMsg->term == ths->raftStore.currentTerm);
-
-    if (pMsg->success) {
-      SyncIndex oldMatchIndex = syncIndexMgrGetIndex(ths->pMatchIndex, &(pMsg->srcId));
-      if (pMsg->matchIndex > oldMatchIndex) {
-        syncIndexMgrSetIndex(ths->pMatchIndex, &(pMsg->srcId), pMsg->matchIndex);
-        syncMaybeAdvanceCommitIndex(ths);
-
-        // maybe update minMatchIndex
-        ths->minMatchIndex = syncMinMatchIndex(ths);
-      }
-      syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), pMsg->matchIndex + 1);
-
-    } else {
-      SyncIndex nextIndex = syncIndexMgrGetIndex(ths->pNextIndex, &(pMsg->srcId));
-      if (nextIndex > SYNC_INDEX_BEGIN) {
-        --nextIndex;
-      }
-      syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), nextIndex);
-    }
-
-    // send next append entries
-    SPeerState* pState = syncNodeGetPeerState(ths, &(pMsg->srcId));
-    ASSERT(pState != NULL);
-
-    if (pMsg->lastSendIndex == pState->lastSendIndex) {
-      int64_t timeNow = taosGetTimestampMs();
-      int64_t elapsed = timeNow - pState->lastSendTime;
-      sNTrace(ths, "sync-append-entries rtt elapsed:%" PRId64 ", index:%" PRId64, elapsed, pState->lastSendIndex);
-
-      syncNodeReplicateOne(ths, &(pMsg->srcId), true);
-    }
-  }
-
-  syncLogRecvAppendEntriesReply(ths, pMsg, "process");
-  return 0;
-}
--- a/source/libs/sync/src/syncCommit.c
+++ b/source/libs/sync/src/syncCommit.c
@@ -43,148 +43,6 @@
 //        IN commitIndex' = [commitIndex EXCEPT ![i] = newCommitIndex]
 //     /\ UNCHANGED <<messages, serverVars, candidateVars, leaderVars, log>>
 //
-void syncOneReplicaAdvance(SSyncNode* pSyncNode) {
-  ASSERT(false && "deprecated");
-  if (pSyncNode == NULL) {
-    sError("pSyncNode is NULL");
-    return;
-  }
-
-  if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) {
-    sNError(pSyncNode, "not leader, can not advance commit index");
-    return;
-  }
-
-  if (pSyncNode->replicaNum != 1) {
-    sNError(pSyncNode, "not one replica, can not advance commit index");
-    return;
-  }
-
-  // advance commit index to snapshot first
-  SSnapshot snapshot;
-  pSyncNode->pFsm->FpGetSnapshotInfo(pSyncNode->pFsm, &snapshot);
-  if (snapshot.lastApplyIndex > 0 && snapshot.lastApplyIndex > pSyncNode->commitIndex) {
-    SyncIndex commitBegin = pSyncNode->commitIndex;
-    SyncIndex commitEnd = snapshot.lastApplyIndex;
-    pSyncNode->commitIndex = snapshot.lastApplyIndex;
-    sNTrace(pSyncNode, "commit by snapshot from index:%" PRId64 " to index:%" PRId64, commitBegin, commitEnd);
-  }
-
-  // advance commit index as large as possible
-  SyncIndex lastIndex = syncNodeGetLastIndex(pSyncNode);
-  if (lastIndex > pSyncNode->commitIndex) {
-    sNTrace(pSyncNode, "commit by wal from index:%" PRId64 " to index:%" PRId64, pSyncNode->commitIndex + 1, lastIndex);
-    pSyncNode->commitIndex = lastIndex;
-  }
-
-  // call back Wal
-  SyncIndex walCommitVer = logStoreWalCommitVer(pSyncNode->pLogStore);
-  if (pSyncNode->commitIndex > walCommitVer) {
-    pSyncNode->pLogStore->syncLogUpdateCommitIndex(pSyncNode->pLogStore, pSyncNode->commitIndex);
-  }
-}
-
-void syncMaybeAdvanceCommitIndex(SSyncNode* pSyncNode) {
-  ASSERTS(false, "deprecated");
-  if (pSyncNode == NULL) {
-    sError("pSyncNode is NULL");
-    return;
-  }
-
-  if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) {
-    sNError(pSyncNode, "not leader, can not advance commit index");
-    return;
-  }
-
-  // advance commit index to sanpshot first
-  SSnapshot snapshot;
-  pSyncNode->pFsm->FpGetSnapshotInfo(pSyncNode->pFsm, &snapshot);
-  if (snapshot.lastApplyIndex > 0 && snapshot.lastApplyIndex > pSyncNode->commitIndex) {
-    SyncIndex commitBegin = pSyncNode->commitIndex;
-    SyncIndex commitEnd = snapshot.lastApplyIndex;
-    pSyncNode->commitIndex = snapshot.lastApplyIndex;
-    sNTrace(pSyncNode, "commit by snapshot from index:%" PRId64 " to index:%" PRId64, commitBegin, commitEnd);
-  }
-
-  // update commit index
-  SyncIndex newCommitIndex = pSyncNode->commitIndex;
-  for (SyncIndex index = syncNodeGetLastIndex(pSyncNode); index > pSyncNode->commitIndex; --index) {
-    bool agree = syncAgree(pSyncNode, index);
-
-    if (agree) {
-      // term
-      SSyncRaftEntry* pEntry = NULL;
-      SLRUCache*      pCache = pSyncNode->pLogStore->pCache;
-      LRUHandle*      h = taosLRUCacheLookup(pCache, &index, sizeof(index));
-      if (h) {
-        pEntry = (SSyncRaftEntry*)taosLRUCacheValue(pCache, h);
-
-        pSyncNode->pLogStore->cacheHit++;
-        sNTrace(pSyncNode, "hit cache index:%" PRId64 ", bytes:%u, %p", index, pEntry->bytes, pEntry);
-
-      } else {
-        pSyncNode->pLogStore->cacheMiss++;
-        sNTrace(pSyncNode, "miss cache index:%" PRId64, index);
-
-        int32_t code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, index, &pEntry);
-        if (code != 0) {
-          sNError(pSyncNode, "advance commit index error, read wal index:%" PRId64, index);
-          return;
-        }
-      }
-      // cannot commit, even if quorum agree. need check term!
-      if (pEntry->term <= pSyncNode->raftStore.currentTerm) {
-        // update commit index
-        newCommitIndex = index;
-
-        if (h) {
-          taosLRUCacheRelease(pCache, h, false);
-        } else {
-          syncEntryDestroy(pEntry);
-        }
-
-        break;
-      } else {
-        sNTrace(pSyncNode, "can not commit due to term not equal, index:%" PRId64 ", term:%" PRIu64, pEntry->index,
-                pEntry->term);
-      }
-
-      if (h) {
-        taosLRUCacheRelease(pCache, h, false);
-      } else {
-        syncEntryDestroy(pEntry);
-      }
-    }
-  }
-
-  // advance commit index as large as possible
-  SyncIndex walCommitVer = logStoreWalCommitVer(pSyncNode->pLogStore);
-  if (walCommitVer > newCommitIndex) {
-    newCommitIndex = walCommitVer;
-  }
-
-  // maybe execute fsm
-  if (newCommitIndex > pSyncNode->commitIndex) {
-    SyncIndex beginIndex = pSyncNode->commitIndex + 1;
-    SyncIndex endIndex = newCommitIndex;
-
-    // update commit index
-    pSyncNode->commitIndex = newCommitIndex;
-
-    // call back Wal
-    pSyncNode->pLogStore->syncLogUpdateCommitIndex(pSyncNode->pLogStore, pSyncNode->commitIndex);
-
-    // execute fsm
-    if (pSyncNode != NULL && pSyncNode->pFsm != NULL) {
-      int32_t code = syncNodeDoCommit(pSyncNode, beginIndex, endIndex, pSyncNode->state);
-      if (code != 0) {
-        sNError(pSyncNode, "advance commit index error, do commit begin:%" PRId64 ", end:%" PRId64, beginIndex,
-                endIndex);
-        return;
-      }
-    }
-  }
-}

 bool syncAgreeIndex(SSyncNode* pSyncNode, SRaftId* pRaftId, SyncIndex index) {
  // I am leader, I agree
@@ -210,83 +68,7 @@ static inline int64_t syncNodeAbs64(int64_t a, int64_t b) {
  return c;
 }

-int32_t syncNodeDynamicQuorum(const SSyncNode* pSyncNode) {
-  return pSyncNode->quorum;
-
-#if 0
-  int32_t quorum = 1;  // self
-
-  int64_t timeNow = taosGetTimestampMs();
-  for (int i = 0; i < pSyncNode->peersNum; ++i) {
-    int64_t   peerStartTime = syncIndexMgrGetStartTime(pSyncNode->pNextIndex, &(pSyncNode->peersId)[i]);
-    int64_t   peerRecvTime = syncIndexMgrGetRecvTime(pSyncNode->pNextIndex, &(pSyncNode->peersId)[i]);
-    SyncIndex peerMatchIndex = syncIndexMgrGetIndex(pSyncNode->pMatchIndex, &(pSyncNode->peersId)[i]);
-
-    int64_t recvTimeDiff = TABS(peerRecvTime - timeNow);
-    int64_t startTimeDiff = TABS(peerStartTime - pSyncNode->startTime);
-    int64_t logDiff = TABS(peerMatchIndex - syncNodeGetLastIndex(pSyncNode));
-
-    /*
-        int64_t recvTimeDiff = syncNodeAbs64(peerRecvTime, timeNow);
-        int64_t startTimeDiff = syncNodeAbs64(peerStartTime, pSyncNode->startTime);
-        int64_t logDiff = syncNodeAbs64(peerMatchIndex, syncNodeGetLastIndex(pSyncNode));
-    */
-
-    int32_t addQuorum = 0;
-
-    if (recvTimeDiff < SYNC_MAX_RECV_TIME_RANGE_MS) {
-      if (startTimeDiff < SYNC_MAX_START_TIME_RANGE_MS) {
-        addQuorum = 1;
-      } else {
-        if (logDiff < SYNC_ADD_QUORUM_COUNT) {
-          addQuorum = 1;
-        } else {
-          addQuorum = 0;
-        }
-      }
-    } else {
-      addQuorum = 0;
-    }
-
-    /*
-        if (recvTimeDiff < SYNC_MAX_RECV_TIME_RANGE_MS) {
-          addQuorum = 1;
-        } else {
-          addQuorum = 0;
-        }
-
-        if (startTimeDiff > SYNC_MAX_START_TIME_RANGE_MS) {
-          addQuorum = 0;
-        }
-    */
-
-    quorum += addQuorum;
-  }
-
-  ASSERT(quorum <= pSyncNode->replicaNum);
-
-  if (quorum < pSyncNode->quorum) {
-    quorum = pSyncNode->quorum;
-  }
-
-  return quorum;
-#endif
-}
-
-/*
-bool syncAgree(SSyncNode* pSyncNode, SyncIndex index) {
-  int agreeCount = 0;
-  for (int i = 0; i < pSyncNode->replicaNum; ++i) {
-    if (syncAgreeIndex(pSyncNode, &(pSyncNode->replicasId[i]), index)) {
-      ++agreeCount;
-    }
-    if (agreeCount >= syncNodeDynamicQuorum(pSyncNode)) {
-      return true;
-    }
-  }
-  return false;
-}
-*/
+int32_t syncNodeDynamicQuorum(const SSyncNode* pSyncNode) { return pSyncNode->quorum; }

 bool syncNodeAgreedUpon(SSyncNode* pNode, SyncIndex index) {
  int            count = 0;

--- a/source/libs/sync/src/syncElection.c
+++ b/source/libs/sync/src/syncElection.c
@@ -43,7 +43,10 @@ static int32_t syncNodeRequestVotePeers(SSyncNode* pNode) {
  for (int i = 0; i < pNode->peersNum; ++i) {
    SRpcMsg rpcMsg = {0};
    ret = syncBuildRequestVote(&rpcMsg, pNode->vgId);
-    ASSERT(ret == 0);
+    if (ret < 0) {
+      sError("vgId:%d, failed to build request-vote msg since %s", pNode->vgId, terrstr());
+      continue;
+    }

    SyncRequestVote* pMsg = rpcMsg.pCont;
    pMsg->srcId = pNode->myRaftId;
@@ -51,13 +54,18 @@ static int32_t syncNodeRequestVotePeers(SSyncNode* pNode) {
    pMsg->term = pNode->raftStore.currentTerm;

    ret = syncNodeGetLastIndexTerm(pNode, &pMsg->lastLogIndex, &pMsg->lastLogTerm);
-    ASSERT(ret == 0);
+    if (ret < 0) {
+      sError("vgId:%d, failed to get index and term of last log since %s", pNode->vgId, terrstr());
+      continue;
+    }

    ret = syncNodeSendMsgById(&pNode->peersId[i], pNode, &rpcMsg);
-    ASSERT(ret == 0);
+    if (ret < 0) {
+      sError("vgId:%d, failed to send msg to peerId:%" PRId64, pNode->vgId, pNode->peersId[i].addr);
+      continue;
+    }
  }
-
-  return ret;
+  return 0;
 }

 int32_t syncNodeElect(SSyncNode* pSyncNode) {

--- a/source/libs/sync/src/syncMain.c
+++ b/source/libs/sync/src/syncMain.c
@@ -292,8 +292,6 @@ int32_t syncBeginSnapshot(int64_t rid, int64_t lastApplyIndex) {
    goto _DEL_WAL;

  } else {
-    lastApplyIndex -= SYNC_VNODE_LOG_RETENTION;
-
    SyncIndex beginIndex = pSyncNode->pLogStore->syncLogBeginIndex(pSyncNode->pLogStore);
    SyncIndex endIndex = pSyncNode->pLogStore->syncLogEndIndex(pSyncNode->pLogStore);
    bool      isEmpty = pSyncNode->pLogStore->syncLogIsEmpty(pSyncNode->pLogStore);
@@ -308,6 +306,8 @@ int32_t syncBeginSnapshot(int64_t rid, int64_t lastApplyIndex) {
    if (pSyncNode->replicaNum > 1) {
      // multi replicas

+      lastApplyIndex = TMAX(lastApplyIndex - SYNC_VNODE_LOG_RETENTION, beginIndex - 1);
+
      if (pSyncNode->state == TAOS_SYNC_STATE_LEADER) {
        pSyncNode->minMatchIndex = syncMinMatchIndex(pSyncNode);

@@ -586,78 +586,6 @@ SSyncState syncGetState(int64_t rid) {
  return state;
 }

-#if 0
-int32_t syncGetSnapshotByIndex(int64_t rid, SyncIndex index, SSnapshot* pSnapshot) {
-  if (index < SYNC_INDEX_BEGIN) {
-    return -1;
-  }
-
-  SSyncNode* pSyncNode = syncNodeAcquire(rid);
-  if (pSyncNode == NULL) {
-    return -1;
-  }
-  ASSERT(rid == pSyncNode->rid);
-
-  SSyncRaftEntry* pEntry = NULL;
-  int32_t         code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, index, &pEntry);
-  if (code != 0) {
-    if (pEntry != NULL) {
-      syncEntryDestroy(pEntry);
-    }
-    syncNodeRelease(pSyncNode);
-    return -1;
-  }
-  ASSERT(pEntry != NULL);
-
-  pSnapshot->data = NULL;
-  pSnapshot->lastApplyIndex = index;
-  pSnapshot->lastApplyTerm = pEntry->term;
-  pSnapshot->lastConfigIndex = syncNodeGetSnapshotConfigIndex(pSyncNode, index);
-
-  syncEntryDestroy(pEntry);
-  syncNodeRelease(pSyncNode);
-  return 0;
-}
-
-int32_t syncGetSnapshotMeta(int64_t rid, struct SSnapshotMeta* sMeta) {
-  SSyncNode* pSyncNode = syncNodeAcquire(rid);
-  if (pSyncNode == NULL) {
-    return -1;
-  }
-  ASSERT(rid == pSyncNode->rid);
-  sMeta->lastConfigIndex = pSyncNode->raftCfg.lastConfigIndex;
-
-  sTrace("vgId:%d, get snapshot meta, lastConfigIndex:%" PRId64, pSyncNode->vgId, pSyncNode->raftCfg.lastConfigIndex);
-
-  syncNodeRelease(pSyncNode);
-  return 0;
-}
-
-int32_t syncGetSnapshotMetaByIndex(int64_t rid, SyncIndex snapshotIndex, struct SSnapshotMeta* sMeta) {
-  SSyncNode* pSyncNode = syncNodeAcquire(rid);
-  if (pSyncNode == NULL) {
-    return -1;
-  }
-  ASSERT(rid == pSyncNode->rid);
-
-  ASSERT(pSyncNode->raftCfg.configIndexCount >= 1);
-  SyncIndex lastIndex = (pSyncNode->raftCfg.configIndexArr)[0];
-
-  for (int32_t i = 0; i < pSyncNode->raftCfg.configIndexCount; ++i) {
-    if ((pSyncNode->raftCfg.configIndexArr)[i] > lastIndex &&
-        (pSyncNode->raftCfg.configIndexArr)[i] <= snapshotIndex) {
-      lastIndex = (pSyncNode->raftCfg.configIndexArr)[i];
-    }
-  }
-  sMeta->lastConfigIndex = lastIndex;
-  sTrace("vgId:%d, get snapshot meta by index:%" PRId64 " lcindex:%" PRId64, pSyncNode->vgId, snapshotIndex,
-         sMeta->lastConfigIndex);
-
-  syncNodeRelease(pSyncNode);
-  return 0;
-}
-#endif
-
 SyncIndex syncNodeGetSnapshotConfigIndex(SSyncNode* pSyncNode, SyncIndex snapshotLastApplyIndex) {
  ASSERT(pSyncNode->raftCfg.configIndexCount >= 1);
  SyncIndex lastIndex = (pSyncNode->raftCfg.configIndexArr)[0];
@@ -1042,9 +970,12 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) {
  pSyncNode->commitIndex = commitIndex;
  sInfo("vgId:%d, sync node commitIndex initialized as %" PRId64, pSyncNode->vgId, pSyncNode->commitIndex);

+  // restore log store on need
  if (syncNodeLogStoreRestoreOnNeed(pSyncNode) < 0) {
+    sError("vgId:%d, failed to restore log store since %s.", pSyncNode->vgId, terrstr());
    goto _error;
  }
+
  // timer ms init
  pSyncNode->pingBaseLine = PING_TIMER_MS;
  pSyncNode->electBaseLine = tsElectInterval;
@@ -1107,10 +1038,16 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) {
  pSyncNode->changing = false;

  // replication mgr
-  syncNodeLogReplMgrInit(pSyncNode);
+  if (syncNodeLogReplMgrInit(pSyncNode) < 0) {
+    sError("vgId:%d, failed to init repl mgr since %s.", pSyncNode->vgId, terrstr());
+    goto _error;
+  }

  // peer state
-  syncNodePeerStateInit(pSyncNode);
+  if (syncNodePeerStateInit(pSyncNode) < 0) {
+    sError("vgId:%d, failed to init peer stat since %s.", pSyncNode->vgId, terrstr());
+    goto _error;
+  }

  //
  // min match index
@@ -1205,27 +1142,10 @@ int32_t syncNodeStart(SSyncNode* pSyncNode) {

  int32_t ret = 0;
  ret = syncNodeStartPingTimer(pSyncNode);
-  ASSERT(ret == 0);
-  return ret;
-}
-
-void syncNodeStartOld(SSyncNode* pSyncNode) {
-  // start raft
-  if (pSyncNode->replicaNum == 1) {
-    raftStoreNextTerm(pSyncNode);
-    syncNodeBecomeLeader(pSyncNode, "one replica start");
-
-    // Raft 3.6.2 Committing entries from previous terms
-    syncNodeAppendNoop(pSyncNode);
-    syncMaybeAdvanceCommitIndex(pSyncNode);
-
-  } else {
-    syncNodeBecomeFollower(pSyncNode, "first start");
+  if (ret != 0) {
+    sError("vgId:%d, failed to start ping timer since %s", pSyncNode->vgId, terrstr());
  }
-
-  int32_t ret = 0;
-  ret = syncNodeStartPingTimer(pSyncNode);
-  ASSERT(ret == 0);
+  return ret;
 }

 int32_t syncNodeStartStandBy(SSyncNode* pSyncNode) {
@@ -1236,11 +1156,16 @@ int32_t syncNodeStartStandBy(SSyncNode* pSyncNode) {
  // reset elect timer, long enough
  int32_t electMS = TIMER_MAX_MS;
  int32_t ret = syncNodeRestartElectTimer(pSyncNode, electMS);
-  ASSERT(ret == 0);
+  if (ret < 0) {
+    sError("vgId:%d, failed to restart elect timer since %s", pSyncNode->vgId, terrstr());
+    return -1;
+  }

-  ret = 0;
  ret = syncNodeStartPingTimer(pSyncNode);
-  ASSERT(ret == 0);
+  if (ret < 0) {
+    sError("vgId:%d, failed to start ping timer since %s", pSyncNode->vgId, terrstr());
+    return -1;
+  }
  return ret;
 }

@@ -1829,12 +1754,6 @@ void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr) {
  pSyncNode->leaderCache = pSyncNode->myRaftId;

  for (int32_t i = 0; i < pSyncNode->pNextIndex->replicaNum; ++i) {
-    // maybe overwrite myself, no harm
-    // just do it!
-
-    // pSyncNode->pNextIndex->index[i] = pSyncNode->pLogStore->getLastIndex(pSyncNode->pLogStore) + 1;
-
-    // maybe wal is deleted
    SyncIndex lastIndex;
    SyncTerm  lastTerm;
    int32_t   code = syncNodeGetLastIndexTerm(pSyncNode, &lastIndex, &lastTerm);
@@ -1896,7 +1815,11 @@ void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr) {

 void syncNodeCandidate2Leader(SSyncNode* pSyncNode) {
  ASSERT(pSyncNode->state == TAOS_SYNC_STATE_CANDIDATE);
-  ASSERT(voteGrantedMajority(pSyncNode->pVotesGranted));
+  bool granted = voteGrantedMajority(pSyncNode->pVotesGranted);
+  if (!granted) {
+    sError("vgId:%d, not granted by majority.", pSyncNode->vgId);
+    return;
+  }
  syncNodeBecomeLeader(pSyncNode, "candidate to leader");

  sNTrace(pSyncNode, "state change syncNodeCandidate2Leader");
@@ -1912,20 +1835,6 @@ void syncNodeCandidate2Leader(SSyncNode* pSyncNode) {
        pSyncNode->vgId, pSyncNode->raftStore.currentTerm, pSyncNode->commitIndex, lastIndex);
 }

-void syncNodeCandidate2LeaderOld(SSyncNode* pSyncNode) {
-  ASSERT(pSyncNode->state == TAOS_SYNC_STATE_CANDIDATE);
-  ASSERT(voteGrantedMajority(pSyncNode->pVotesGranted));
-  syncNodeBecomeLeader(pSyncNode, "candidate to leader");
-
-  // Raft 3.6.2 Committing entries from previous terms
-  syncNodeAppendNoop(pSyncNode);
-  syncMaybeAdvanceCommitIndex(pSyncNode);
-
-  if (pSyncNode->replicaNum > 1) {
-    syncNodeReplicate(pSyncNode);
-  }
-}
-
 bool syncNodeIsMnode(SSyncNode* pSyncNode) { return (pSyncNode->vgId == 1); }

 int32_t syncNodePeerStateInit(SSyncNode* pSyncNode) {
@@ -1971,7 +1880,8 @@ void syncNodeCandidate2Follower(SSyncNode* pSyncNode) {
 // need assert
 void syncNodeVoteForTerm(SSyncNode* pSyncNode, SyncTerm term, SRaftId* pRaftId) {
  ASSERT(term == pSyncNode->raftStore.currentTerm);
-  ASSERT(!raftStoreHasVoted(pSyncNode));
+  bool voted = raftStoreHasVoted(pSyncNode);
+  ASSERT(!voted);

  raftStoreVote(pSyncNode, pRaftId);
 }
@@ -2649,24 +2559,6 @@ int32_t syncNodeOnLocalCmd(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
  return 0;
 }

-int32_t syncNodeOnLocalCmdOld(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
-  ASSERT(false && "deprecated");
-  SyncLocalCmd* pMsg = pRpcMsg->pCont;
-  syncLogRecvLocalCmd(ths, pMsg, "");
-
-  if (pMsg->cmd == SYNC_LOCAL_CMD_STEP_DOWN) {
-    syncNodeStepDown(ths, pMsg->currentTerm);
-
-  } else if (pMsg->cmd == SYNC_LOCAL_CMD_FOLLOWER_CMT) {
-    syncNodeFollowerCommit(ths, pMsg->commitIndex);
-
-  } else {
-    sError("error local cmd");
-  }
-
-  return 0;
-}
-
 // TLA+ Spec
 // ClientRequest(i, v) ==
 //     /\ state[i] = Leader
@@ -2711,96 +2603,6 @@ int32_t syncNodeOnClientRequest(SSyncNode* ths, SRpcMsg* pMsg, SyncIndex* pRetIn
  }
 }

-int32_t syncNodeOnClientRequestOld(SSyncNode* ths, SRpcMsg* pMsg, SyncIndex* pRetIndex) {
-  sNTrace(ths, "on client request");
-
-  int32_t ret = 0;
-  int32_t code = 0;
-
-  SyncIndex       index = ths->pLogStore->syncLogWriteIndex(ths->pLogStore);
-  SyncTerm        term = ths->raftStore.currentTerm;
-  SSyncRaftEntry* pEntry;
-
-  if (pMsg->msgType == TDMT_SYNC_CLIENT_REQUEST) {
-    pEntry = syncEntryBuildFromClientRequest(pMsg->pCont, term, index);
-  } else {
-    pEntry = syncEntryBuildFromRpcMsg(pMsg, term, index);
-  }
-
-  LRUHandle* h = NULL;
-
-  if (ths->state == TAOS_SYNC_STATE_LEADER) {
-    // append entry
-    code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pEntry, false);
-    if (code != 0) {
-      if (ths->replicaNum == 1) {
-        if (h) {
-          taosLRUCacheRelease(ths->pLogStore->pCache, h, false);
-        } else {
-          syncEntryDestroy(pEntry);
-        }
-
-        return -1;
-
-      } else {
-        // del resp mgr, call FpCommitCb
-        SFsmCbMeta cbMeta = {
-            .index = pEntry->index,
-            .lastConfigIndex = SYNC_INDEX_INVALID,
-            .isWeak = pEntry->isWeak,
-            .code = -1,
-            .state = ths->state,
-            .seqNum = pEntry->seqNum,
-            .term = pEntry->term,
-            .currentTerm = ths->raftStore.currentTerm,
-            .flag = 0,
-        };
-        ths->pFsm->FpCommitCb(ths->pFsm, pMsg, &cbMeta);
-
-        if (h) {
-          taosLRUCacheRelease(ths->pLogStore->pCache, h, false);
-        } else {
-          syncEntryDestroy(pEntry);
-        }
-
-        return -1;
-      }
-    }
-
-    syncCacheEntry(ths->pLogStore, pEntry, &h);
-
-    // if mulit replica, start replicate right now
-    if (ths->replicaNum > 1) {
-      syncNodeReplicate(ths);
-    }
-
-    // if only myself, maybe commit right now
-    if (ths->replicaNum == 1) {
-      if (syncNodeIsMnode(ths)) {
-        syncMaybeAdvanceCommitIndex(ths);
-      } else {
-        syncOneReplicaAdvance(ths);
-      }
-    }
-  }
-
-  if (pRetIndex != NULL) {
-    if (ret == 0 && pEntry != NULL) {
-      *pRetIndex = pEntry->index;
-    } else {
-      *pRetIndex = SYNC_INDEX_INVALID;
-    }
-  }
-
-  if (h) {
-    taosLRUCacheRelease(ths->pLogStore->pCache, h, false);
-  } else {
-    syncEntryDestroy(pEntry);
-  }
-
-  return ret;
-}
-
 const char* syncStr(ESyncState state) {
  switch (state) {
    case TAOS_SYNC_STATE_FOLLOWER:
@@ -2905,129 +2707,6 @@ bool syncNodeIsOptimizedOneReplica(SSyncNode* ths, SRpcMsg* pMsg) {
  return (ths->replicaNum == 1 && syncUtilUserCommit(pMsg->msgType) && ths->vgId != 1);
 }

-int32_t syncNodeDoCommit(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endIndex, uint64_t flag) {
-  ASSERT(false);
-  if (beginIndex > endIndex) {
-    return 0;
-  }
-
-  if (ths == NULL) {
-    return -1;
-  }
-
-  if (ths->pFsm != NULL && ths->pFsm->FpGetSnapshotInfo != NULL) {
-    // advance commit index to sanpshot first
-    SSnapshot snapshot = {0};
-    ths->pFsm->FpGetSnapshotInfo(ths->pFsm, &snapshot);
-    if (snapshot.lastApplyIndex >= 0 && snapshot.lastApplyIndex >= beginIndex) {
-      sNTrace(ths, "commit by snapshot from index:%" PRId64 " to index:%" PRId64, beginIndex, snapshot.lastApplyIndex);
-
-      // update begin index
-      beginIndex = snapshot.lastApplyIndex + 1;
-    }
-  }
-
-  int32_t    code = 0;
-  ESyncState state = flag;
-
-  sNTrace(ths, "commit by wal from index:%" PRId64 " to index:%" PRId64, beginIndex, endIndex);
-
-  // execute fsm
-  if (ths->pFsm != NULL) {
-    for (SyncIndex i = beginIndex; i <= endIndex; ++i) {
-      if (i != SYNC_INDEX_INVALID) {
-        SSyncRaftEntry* pEntry;
-        SLRUCache*      pCache = ths->pLogStore->pCache;
-        LRUHandle*      h = taosLRUCacheLookup(pCache, &i, sizeof(i));
-        if (h) {
-          pEntry = (SSyncRaftEntry*)taosLRUCacheValue(pCache, h);
-
-          ths->pLogStore->cacheHit++;
-          sNTrace(ths, "hit cache index:%" PRId64 ", bytes:%u, %p", i, pEntry->bytes, pEntry);
-
-        } else {
-          ths->pLogStore->cacheMiss++;
-          sNTrace(ths, "miss cache index:%" PRId64, i);
-
-          code = ths->pLogStore->syncLogGetEntry(ths->pLogStore, i, &pEntry);
-          // ASSERT(code == 0);
-          // ASSERT(pEntry != NULL);
-          if (code != 0 || pEntry == NULL) {
-            sNError(ths, "get log entry error");
-            sFatal("vgId:%d, get log entry %" PRId64 " error when commit since %s", ths->vgId, i, terrstr());
-            continue;
-          }
-        }
-
-        SRpcMsg rpcMsg = {0};
-        syncEntry2OriginalRpc(pEntry, &rpcMsg);
-
-        sTrace("do commit index:%" PRId64 ", type:%s", i, TMSG_INFO(pEntry->msgType));
-
-        // user commit
-        if ((ths->pFsm->FpCommitCb != NULL) && syncUtilUserCommit(pEntry->originalRpcType)) {
-          bool internalExecute = true;
-          if ((ths->replicaNum == 1) && ths->restoreFinish && ths->vgId != 1) {
-            internalExecute = false;
-          }
-
-          sNTrace(ths, "user commit index:%" PRId64 ", internal:%d, type:%s", i, internalExecute,
-                  TMSG_INFO(pEntry->msgType));
-
-          // execute fsm in apply thread, or execute outside syncPropose
-          if (internalExecute) {
-            SFsmCbMeta cbMeta = {
-                .index = pEntry->index,
-                .lastConfigIndex = syncNodeGetSnapshotConfigIndex(ths, pEntry->index),
-                .isWeak = pEntry->isWeak,
-                .code = 0,
-                .state = ths->state,
-                .seqNum = pEntry->seqNum,
-                .term = pEntry->term,
-                .currentTerm = ths->raftStore.currentTerm,
-                .flag = flag,
-            };
-
-            syncRespMgrGetAndDel(ths->pSyncRespMgr, cbMeta.seqNum, &rpcMsg.info);
-            ths->pFsm->FpCommitCb(ths->pFsm, &rpcMsg, &cbMeta);
-          }
-        }
-
-#if 0
-        // execute in pre-commit
-        // leader transfer
-        if (pEntry->originalRpcType == TDMT_SYNC_LEADER_TRANSFER) {
-          code = syncDoLeaderTransfer(ths, &rpcMsg, pEntry);
-          ASSERT(code == 0);
-        }
-#endif
-
-        // restore finish
-        // if only snapshot, a noop entry will be append, so syncLogLastIndex is always ok
-        if (pEntry->index == ths->pLogStore->syncLogLastIndex(ths->pLogStore)) {
-          if (ths->restoreFinish == false) {
-            if (ths->pFsm->FpRestoreFinishCb != NULL) {
-              ths->pFsm->FpRestoreFinishCb(ths->pFsm);
-            }
-            ths->restoreFinish = true;
-
-            int64_t restoreDelay = taosGetTimestampMs() - ths->leaderTime;
-            sNTrace(ths, "restore finish, index:%" PRId64 ", elapsed:%" PRId64 " ms", pEntry->index, restoreDelay);
-          }
-        }
-
-        rpcFreeCont(rpcMsg.pCont);
-        if (h) {
-          taosLRUCacheRelease(pCache, h, false);
-        } else {
-          syncEntryDestroy(pEntry);
-        }
-      }
-    }
-  }
-  return 0;
-}
-
 bool syncNodeInRaftGroup(SSyncNode* ths, SRaftId* pRaftId) {
  for (int32_t i = 0; i < ths->replicaNum; ++i) {
    if (syncUtilSameId(&((ths->replicasId)[i]), pRaftId)) {

--- a/source/libs/sync/src/syncPipeline.c
+++ b/source/libs/sync/src/syncPipeline.c
@@ -945,8 +945,11 @@ int32_t syncNodeLogReplMgrInit(SSyncNode* pNode) {
  for (int i = 0; i < TSDB_MAX_REPLICA; i++) {
    ASSERT(pNode->logReplMgrs[i] == NULL);
    pNode->logReplMgrs[i] = syncLogReplMgrCreate();
+    if (pNode->logReplMgrs[i] == NULL) {
+      terrno = TSDB_CODE_OUT_OF_MEMORY;
+      return -1;
+    }
    pNode->logReplMgrs[i]->peerId = i;
-    ASSERTS(pNode->logReplMgrs[i] != NULL, "Out of memory.");
  }
  return 0;
 }

--- a/source/libs/sync/src/syncReplication.c
+++ b/source/libs/sync/src/syncReplication.c
@@ -48,92 +48,6 @@

 int32_t syncNodeMaybeSendAppendEntries(SSyncNode* pSyncNode, const SRaftId* destRaftId, SRpcMsg* pRpcMsg);

-int32_t syncNodeReplicateOne(SSyncNode* pSyncNode, SRaftId* pDestId, bool snapshot) {
-  ASSERT(false && "deprecated");
-  // next index
-  SyncIndex nextIndex = syncIndexMgrGetIndex(pSyncNode->pNextIndex, pDestId);
-
-  if (snapshot) {
-    // maybe start snapshot
-    SyncIndex logStartIndex = pSyncNode->pLogStore->syncLogBeginIndex(pSyncNode->pLogStore);
-    SyncIndex logEndIndex = pSyncNode->pLogStore->syncLogEndIndex(pSyncNode->pLogStore);
-    if (nextIndex < logStartIndex || nextIndex - 1 > logEndIndex) {
-      sNTrace(pSyncNode, "maybe start snapshot for next-index:%" PRId64 ", start:%" PRId64 ", end:%" PRId64, nextIndex,
-              logStartIndex, logEndIndex);
-      // start snapshot
-      int32_t code = syncNodeStartSnapshot(pSyncNode, pDestId);
-    }
-  }
-
-  // pre index, pre term
-  SyncIndex preLogIndex = syncNodeGetPreIndex(pSyncNode, nextIndex);
-  SyncTerm  preLogTerm = syncNodeGetPreTerm(pSyncNode, nextIndex);
-
-  // prepare entry
-  SRpcMsg            rpcMsg = {0};
-  SyncAppendEntries* pMsg = NULL;
-
-  SSyncRaftEntry* pEntry = NULL;
-  SLRUCache*      pCache = pSyncNode->pLogStore->pCache;
-  LRUHandle*      h = taosLRUCacheLookup(pCache, &nextIndex, sizeof(nextIndex));
-  int32_t         code = 0;
-  if (h) {
-    pEntry = (SSyncRaftEntry*)taosLRUCacheValue(pCache, h);
-    code = 0;
-
-    pSyncNode->pLogStore->cacheHit++;
-    sNTrace(pSyncNode, "hit cache index:%" PRId64 ", bytes:%u, %p", nextIndex, pEntry->bytes, pEntry);
-
-  } else {
-    pSyncNode->pLogStore->cacheMiss++;
-    sNTrace(pSyncNode, "miss cache index:%" PRId64, nextIndex);
-
-    code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, nextIndex, &pEntry);
-  }
-
-  if (code == 0) {
-    ASSERT(pEntry != NULL);
-
-    code = syncBuildAppendEntries(&rpcMsg, (int32_t)(pEntry->bytes), pSyncNode->vgId);
-    ASSERT(code == 0);
-
-    pMsg = rpcMsg.pCont;
-    memcpy(pMsg->data, pEntry, pEntry->bytes);
-  } else {
-    if (terrno == TSDB_CODE_WAL_LOG_NOT_EXIST) {
-      // no entry in log
-      code = syncBuildAppendEntries(&rpcMsg, 0, pSyncNode->vgId);
-      ASSERT(code == 0);
-
-      pMsg = rpcMsg.pCont;
-    } else {
-      sNError(pSyncNode, "replicate to dnode:%d error, next-index:%" PRId64, DID(pDestId), nextIndex);
-      return -1;
-    }
-  }
-
-  if (h) {
-    taosLRUCacheRelease(pCache, h, false);
-  } else {
-    syncEntryDestroy(pEntry);
-  }
-
-  // prepare msg
-  ASSERT(pMsg != NULL);
-  pMsg->srcId = pSyncNode->myRaftId;
-  pMsg->destId = *pDestId;
-  pMsg->term = pSyncNode->raftStore.currentTerm;
-  pMsg->prevLogIndex = preLogIndex;
-  pMsg->prevLogTerm = preLogTerm;
-  pMsg->commitIndex = pSyncNode->commitIndex;
-  pMsg->privateTerm = 0;
-  // pMsg->privateTerm = syncIndexMgrGetTerm(pSyncNode->pNextIndex, pDestId);
-
-  // send msg
-  syncNodeMaybeSendAppendEntries(pSyncNode, pDestId, &rpcMsg);
-  return 0;
-}
-
 int32_t syncNodeReplicate(SSyncNode* pNode) {
  SSyncLogBuffer* pBuf = pNode->pLogBuf;
  taosThreadMutexLock(&pBuf->mutex);
@@ -156,25 +70,6 @@ int32_t syncNodeReplicateWithoutLock(SSyncNode* pNode) {
  return 0;
 }

-int32_t syncNodeReplicateOld(SSyncNode* pSyncNode) {
-  if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) {
-    return -1;
-  }
-
-  sNTrace(pSyncNode, "do replicate");
-
-  int32_t ret = 0;
-  for (int i = 0; i < pSyncNode->peersNum; ++i) {
-    SRaftId* pDestId = &(pSyncNode->peersId[i]);
-    ret = syncNodeReplicateOne(pSyncNode, pDestId, true);
-    if (ret != 0) {
-      sError("vgId:%d, do append entries error for dnode:%d", pSyncNode->vgId, DID(pDestId));
-    }
-  }
-
-  return 0;
-}
-
 int32_t syncNodeSendAppendEntries(SSyncNode* pSyncNode, const SRaftId* destRaftId, SRpcMsg* pRpcMsg) {
  SyncAppendEntries* pMsg = pRpcMsg->pCont;
  pMsg->destId = *destRaftId;
@@ -182,39 +77,6 @@ int32_t syncNodeSendAppendEntries(SSyncNode* pSyncNode, const SRaftId* destRaftI
  return 0;
 }

-int32_t syncNodeSendAppendEntriesOld(SSyncNode* pSyncNode, const SRaftId* destRaftId, SRpcMsg* pRpcMsg) {
-  int32_t            ret = 0;
-  SyncAppendEntries* pMsg = pRpcMsg->pCont;
-  if (pMsg == NULL) {
-    sError("vgId:%d, sync-append-entries msg is NULL", pSyncNode->vgId);
-    return 0;
-  }
-
-  SPeerState* pState = syncNodeGetPeerState(pSyncNode, destRaftId);
-  if (pState == NULL) {
-    sError("vgId:%d, replica maybe dropped", pSyncNode->vgId);
-    return 0;
-  }
-
-  // save index, otherwise pMsg will be free by rpc
-  SyncIndex saveLastSendIndex = pState->lastSendIndex;
-  bool      update = false;
-  if (pMsg->dataLen > 0) {
-    saveLastSendIndex = pMsg->prevLogIndex + 1;
-    update = true;
-  }
-
-  syncLogSendAppendEntries(pSyncNode, pMsg, "");
-  syncNodeSendMsgById(destRaftId, pSyncNode, pRpcMsg);
-
-  if (update) {
-    pState->lastSendIndex = saveLastSendIndex;
-    pState->lastSendTime = taosGetTimestampMs();
-  }
-
-  return ret;
-}
-
 int32_t syncNodeMaybeSendAppendEntries(SSyncNode* pSyncNode, const SRaftId* destRaftId, SRpcMsg* pRpcMsg) {
  int32_t            ret = 0;
  SyncAppendEntries* pMsg = pRpcMsg->pCont;

--- a/source/libs/wal/src/walMeta.c
+++ b/source/libs/wal/src/walMeta.c
@@ -322,6 +322,35 @@ bool walLogEntriesComplete(const SWal* pWal) {
  return complete;
 }

+int walTrimIdxFile(SWal* pWal, int32_t fileIdx) {
+  SWalFileInfo* pFileInfo = taosArrayGet(pWal->fileInfoSet, fileIdx);
+  ASSERT(pFileInfo != NULL);
+  char fnameStr[WAL_FILE_LEN];
+  walBuildIdxName(pWal, pFileInfo->firstVer, fnameStr);
+
+  int64_t fileSize = 0;
+  taosStatFile(fnameStr, &fileSize, NULL);
+  int64_t records = TMAX(0, pFileInfo->lastVer - pFileInfo->firstVer + 1);
+  int64_t lastEndOffset = records * sizeof(SWalIdxEntry);
+
+  if (fileSize <= lastEndOffset) {
+    return 0;
+  }
+
+  TdFilePtr pFile = taosOpenFile(fnameStr, TD_FILE_READ | TD_FILE_WRITE);
+  if (pFile == NULL) {
+    terrno = TAOS_SYSTEM_ERROR(errno);
+    return -1;
+  }
+
+  wInfo("vgId:%d, trim idx file. file: %s, size: %" PRId64 ", offset: %" PRId64, pWal->cfg.vgId, fnameStr, fileSize,
+        lastEndOffset);
+
+  taosFtruncateFile(pFile, lastEndOffset);
+  taosCloseFile(&pFile);
+  return 0;
+}
+
 int walCheckAndRepairMeta(SWal* pWal) {
  // load log files, get first/snapshot/last version info
  const char* logPattern = "^[0-9]+.log$";
@@ -396,6 +425,8 @@ int walCheckAndRepairMeta(SWal* pWal) {
    }
    updateMeta = true;

+    (void)walTrimIdxFile(pWal, fileIdx);
+
    int64_t lastVer = walScanLogGetLastVer(pWal, fileIdx);
    if (lastVer < 0) {
      if (terrno != TSDB_CODE_WAL_LOG_NOT_EXIST) {
@@ -558,6 +589,7 @@ int walCheckAndRepairIdxFile(SWal* pWal, int32_t fileIdx) {
    goto _err;
  }

+  int64_t count = 0;
  while (idxEntry.ver < pFileInfo->lastVer) {
    /*A(idxEntry.ver == ckHead.head.version);*/

@@ -569,11 +601,11 @@ int walCheckAndRepairIdxFile(SWal* pWal, int32_t fileIdx) {
             idxEntry.offset, fLogNameStr);
      goto _err;
    }
-    wWarn("vgId:%d, wal idx append new entry %" PRId64 " %" PRId64, pWal->cfg.vgId, idxEntry.ver, idxEntry.offset);
    if (taosWriteFile(pIdxFile, &idxEntry, sizeof(SWalIdxEntry)) < 0) {
      wError("vgId:%d, failed to append file since %s. file:%s", pWal->cfg.vgId, terrstr(), fnameStr);
      goto _err;
    }
+    count++;
  }

  if (taosFsyncFile(pIdxFile) < 0) {
@@ -581,6 +613,11 @@ int walCheckAndRepairIdxFile(SWal* pWal, int32_t fileIdx) {
    goto _err;
  }

+  if (count > 0) {
+    wInfo("vgId:%d, rebuilt %" PRId64 " wal idx entries until lastVer: %" PRId64, pWal->cfg.vgId, count,
+          pFileInfo->lastVer);
+  }
+
  (void)taosCloseFile(&pLogFile);
  (void)taosCloseFile(&pIdxFile);
  return 0;

--- a/source/libs/wal/src/walRef.c
+++ b/source/libs/wal/src/walRef.c
@@ -77,6 +77,31 @@ void walUnrefVer(SWalRef *pRef) {
 }
 #endif

+SWalRef *walRefFirstVer(SWal *pWal, SWalRef *pRef) {
+  if (pRef == NULL) {
+    pRef = walOpenRef(pWal);
+    if (pRef == NULL) {
+      return NULL;
+    }
+  }
+  taosThreadMutexLock(&pWal->mutex);
+
+  int64_t ver = walGetFirstVer(pWal);
+
+  wDebug("vgId:%d, wal ref version %" PRId64 " for first", pWal->cfg.vgId, ver);
+
+  pRef->refVer = ver;
+  // bsearch in fileSet
+  SWalFileInfo tmpInfo;
+  tmpInfo.firstVer = ver;
+  SWalFileInfo *pRet = taosArraySearch(pWal->fileInfoSet, &tmpInfo, compareWalFileInfo, TD_LE);
+  ASSERT(pRet != NULL);
+  pRef->refFile = pRet->firstVer;
+
+  taosThreadMutexUnlock(&pWal->mutex);
+  return pRef;
+}
+
 SWalRef *walRefCommittedVer(SWal *pWal) {
  SWalRef *pRef = walOpenRef(pWal);
  if (pRef == NULL) {
@@ -87,6 +112,8 @@ SWalRef *walRefCommittedVer(SWal *pWal) {

  int64_t ver = walGetCommittedVer(pWal);

+  wDebug("vgId:%d, wal ref version %" PRId64 " for committed", pWal->cfg.vgId, ver);
+
  pRef->refVer = ver;
  // bsearch in fileSet
  SWalFileInfo tmpInfo;

--- a/source/os/CMakeLists.txt
+++ b/source/os/CMakeLists.txt
@@ -41,7 +41,7 @@ target_link_libraries(
 )
 if(TD_WINDOWS)
    target_link_libraries(
-        os PUBLIC ws2_32 iconv msvcregex wcwidth winmm crashdump dbghelp
+        os PUBLIC ws2_32 iconv msvcregex wcwidth winmm crashdump dbghelp version
    )
 elseif(TD_DARWIN_64)
    find_library(CORE_FOUNDATION_FRAMEWORK CoreFoundation)

--- a/source/os/src/osMath.c
+++ b/source/os/src/osMath.c
@@ -15,8 +15,8 @@

 #define ALLOW_FORBID_FUNC
 #define _DEFAULT_SOURCE
-#include "os.h"
 #include <stdlib.h>
+#include "os.h"

 #ifdef WINDOWS
 void swapStr(char* j, char* J, int width) {

--- a/source/os/src/osSysinfo.c
+++ b/source/os/src/osSysinfo.c
@@ -280,11 +280,46 @@ int32_t taosGetEmail(char *email, int32_t maxLen) {
 #endif
 }

+#ifdef WINDOWS
+bool getWinVersionReleaseName(char *releaseName, int32_t maxLen) {
+  TCHAR          szFileName[MAX_PATH];
+  DWORD             dwHandle;
+  DWORD             dwLen;
+  LPVOID            lpData;
+  UINT              uLen;
+  VS_FIXEDFILEINFO *pFileInfo;
+
+  GetWindowsDirectory(szFileName, MAX_PATH);
+  wsprintf(szFileName, L"%s%s", szFileName, L"\\explorer.exe");
+  dwLen = GetFileVersionInfoSize(szFileName, &dwHandle);
+  if (dwLen == 0) {
+    return false;
+  }

+  lpData = malloc(dwLen);
+  if (lpData == NULL) return false;
+  if (!GetFileVersionInfo(szFileName, dwHandle, dwLen, lpData)) {
+    free(lpData);
+    return false;
+  }
+
+  if (!VerQueryValue(lpData, L"\\", (LPVOID *)&pFileInfo, &uLen)) {
+    free(lpData);
+    return false;
+  }
+
+  snprintf(releaseName, maxLen, "Windows %d.%d", HIWORD(pFileInfo->dwProductVersionMS),
+           LOWORD(pFileInfo->dwProductVersionMS));
+  free(lpData);
+  return true;
+}
+#endif

 int32_t taosGetOsReleaseName(char *releaseName, int32_t maxLen) {
 #ifdef WINDOWS
-  snprintf(releaseName, maxLen, "Windows");
+  if (!getWinVersionReleaseName(releaseName, maxLen)) {
+    snprintf(releaseName, maxLen, "Windows");
+  }
  return 0;
 #elif defined(_TD_DARWIN_64)
  char osversion[32];

--- a/source/os/test/osTests.cpp
+++ b/source/os/test/osTests.cpp
@@ -34,6 +34,12 @@ TEST(osTest, osSystem) {
  ELogLevel   level = DEBUG_FATAL;
  int32_t     dflag = 255;  // tsLogEmbedded ? 255 : uDebugFlag
  taosPrintTrace(flags, level, dflag, 0);
+
+  const int sysLen = 64;
+  char      osSysName[sysLen];
+  int       ret = taosGetOsReleaseName(osSysName, sysLen);
+  printf("os systeme name:%s\n", osSysName);
+  ASSERT_EQ(ret, 0);
 }

 void fileOperateOnFree(void *param) {

--- a/source/util/src/talgo.c
+++ b/source/util/src/talgo.c
--- a/tests/docs-examples-test/python.sh
+++ b/tests/docs-examples-test/python.sh
--- a/tests/parallel_test/container_build.sh
+++ b/tests/parallel_test/container_build.sh
--- a/tests/script/tsim/sma/rsmaCreateInsertQuery.sim
+++ b/tests/script/tsim/sma/rsmaCreateInsertQuery.sim
--- a/tests/script/tsim/sma/rsmaPersistenceRecovery.sim
+++ b/tests/script/tsim/sma/rsmaPersistenceRecovery.sim
--- a/tests/script/tsim/sma/tsmaCreateInsertQuery.sim
+++ b/tests/script/tsim/sma/tsmaCreateInsertQuery.sim
--- a/tests/system-test/7-tmq/tmqUpdate-1ctb.py
+++ b/tests/system-test/7-tmq/tmqUpdate-1ctb.py
@@ -206,7 +206,7 @@ class TDTestCase:
        paraDict['rowsPerTbl'] = self.rowsPerTbl
        consumerId     = 1
        if self.snapshot == 0:
-            expectrowcnt   = int(paraDict["rowsPerTbl"] * paraDict["ctbNum"] * (2))
+            expectrowcnt   = int(paraDict["rowsPerTbl"] * paraDict["ctbNum"] * (1/2))
        elif self.snapshot == 1:
            expectrowcnt   = int(paraDict["rowsPerTbl"] * paraDict["ctbNum"] * (1))


--- a/tests/system-test/7-tmq/tmqUpdate-multiCtb-snapshot0.py
+++ b/tests/system-test/7-tmq/tmqUpdate-multiCtb-snapshot0.py