diff --git a/cmake/taostools_CMakeLists.txt.in b/cmake/taostools_CMakeLists.txt.in index ed2ec0b6da2fe8f49a1c215b5257980483fa6d33..db727907b79b33f82e8bb3a3369d32431c412a1e 100644 --- a/cmake/taostools_CMakeLists.txt.in +++ b/cmake/taostools_CMakeLists.txt.in @@ -2,7 +2,7 @@ # taos-tools ExternalProject_Add(taos-tools GIT_REPOSITORY https://github.com/taosdata/taos-tools.git - GIT_TAG d5df76d + GIT_TAG 823fae5 SOURCE_DIR "${TD_SOURCE_DIR}/tools/taos-tools" BINARY_DIR "" #BUILD_IN_SOURCE TRUE diff --git a/docs/en/07-develop/03-insert-data/20-kafka-writting.mdx b/docs/en/07-develop/03-insert-data/20-kafka-writting.mdx new file mode 100644 index 0000000000000000000000000000000000000000..ffb969a8a6d0bc61e23fee44f245a24236db5b1b --- /dev/null +++ b/docs/en/07-develop/03-insert-data/20-kafka-writting.mdx @@ -0,0 +1,46 @@ +--- +title: Write from Kafka +--- + +import Tabs from "@theme/Tabs"; +import TabItem from "@theme/TabItem"; +import PyKafka from "./_py_kafka.mdx"; + +## About Kafka + +Apache Kafka is an open-source distributed event streaming platform, used by thousands of companies for high-performance data pipelines, streaming analytics, data integration, and mission-critical applications. For the key concepts of kafka, please refer to [kafka documentation](https://kafka.apache.org/documentation/#gettingStarted). + +### kafka topic + +Messages in Kafka are organized by topics. A topic may have one or more partitions. We can manage kafka topics through `kafka-topics`. + +create a topic named `kafka-events`: + +``` +bin/kafka-topics.sh --create --topic kafka-events --bootstrap-server localhost:9092 +``` + +Alter `kafka-events` topic to set partitions to 3: + +``` +bin/kafka-topics.sh --alter --topic kafka-events --partitions 3 --bootstrap-server=localhost:9092 +``` + +Show all topics and partitions in Kafka: + +``` +bin/kafka-topics.sh --bootstrap-server=localhost:9092 --describe +``` + +## Insert into TDengine + +We can write data into TDengine via SQL or Schemaless. For more information, please refer to [Insert Using SQL](/develop/insert-data/sql-writing/) or [High Performance Writing](/develop/insert-data/high-volume/) or [Schemaless Writing](/reference/schemaless/). + +## Examples + + + + + + + diff --git a/docs/en/07-develop/03-insert-data/02-influxdb-line.mdx b/docs/en/07-develop/03-insert-data/30-influxdb-line.mdx similarity index 100% rename from docs/en/07-develop/03-insert-data/02-influxdb-line.mdx rename to docs/en/07-develop/03-insert-data/30-influxdb-line.mdx diff --git a/docs/en/07-develop/03-insert-data/03-opentsdb-telnet.mdx b/docs/en/07-develop/03-insert-data/40-opentsdb-telnet.mdx similarity index 100% rename from docs/en/07-develop/03-insert-data/03-opentsdb-telnet.mdx rename to docs/en/07-develop/03-insert-data/40-opentsdb-telnet.mdx diff --git a/docs/en/07-develop/03-insert-data/04-opentsdb-json.mdx b/docs/en/07-develop/03-insert-data/50-opentsdb-json.mdx similarity index 100% rename from docs/en/07-develop/03-insert-data/04-opentsdb-json.mdx rename to docs/en/07-develop/03-insert-data/50-opentsdb-json.mdx diff --git a/docs/en/07-develop/03-insert-data/05-high-volume.md b/docs/en/07-develop/03-insert-data/60-high-volume.md similarity index 100% rename from docs/en/07-develop/03-insert-data/05-high-volume.md rename to docs/en/07-develop/03-insert-data/60-high-volume.md diff --git a/docs/en/07-develop/03-insert-data/_py_kafka.mdx b/docs/en/07-develop/03-insert-data/_py_kafka.mdx new file mode 100644 index 0000000000000000000000000000000000000000..dc43a0d415275189220287278547b015865e8d90 --- /dev/null +++ b/docs/en/07-develop/03-insert-data/_py_kafka.mdx @@ -0,0 +1,60 @@ +### python Kafka 客户端 + +For python kafka client, please refer to [kafka client](https://cwiki.apache.org/confluence/display/KAFKA/Clients#Clients-Python). In this document, we use [kafka-python](http://github.com/dpkp/kafka-python). + +### consume from Kafka + +The simple way to consume messages from Kafka is to read messages one by one. The demo is as follows: + +``` +from kafka import KafkaConsumer +consumer = KafkaConsumer('my_favorite_topic') +for msg in consumer: + print (msg) +``` + +For higher performance, we can consume message from kafka in batch. The demo is as follows: + +``` +from kafka import KafkaConsumer +consumer = KafkaConsumer('my_favorite_topic') +while True: + msgs = consumer.poll(timeout_ms=500, max_records=1000) + if msgs: + print (msgs) +``` + +### multi-threading + +For more higher performance we can process data from kafka in multi-thread. We can use python's ThreadPoolExecutor to achieve multithreading. The demo is as follows: + +``` +from concurrent.futures import ThreadPoolExecutor, Future +pool = ThreadPoolExecutor(max_workers=10) +pool.submit(...) +``` + +### multi-process + +For more higher performance, sometimes we use multiprocessing. In this case, the number of Kafka Consumers should not be greater than the number of Kafka Topic Partitions. The demo is as follows: + +``` +from multiprocessing import Process + +ps = [] +for i in range(5): + p = Process(target=Consumer().consume()) + p.start() + ps.append(p) + +for p in ps: + p.join() +``` + +In addition to python's built-in multithreading and multiprocessing library, we can also use the third-party library gunicorn. + +### Examples + +```py +{{#include docs/examples/python/kafka_example.py}} +``` diff --git a/docs/en/20-third-party/01-grafana.mdx b/docs/en/20-third-party/01-grafana.mdx index e0fbefd5a8634d2001f2cc0601afa110aff33632..ca32ce8afce30eaa6756f7b403685b989d824bbf 100644 --- a/docs/en/20-third-party/01-grafana.mdx +++ b/docs/en/20-third-party/01-grafana.mdx @@ -76,7 +76,7 @@ sudo -u grafana grafana-cli plugins install tdengine-datasource You can also download zip files from [GitHub](https://github.com/taosdata/grafanaplugin/releases/tag/latest) or [Grafana](https://grafana.com/grafana/plugins/tdengine-datasource/?tab=installation) and install manually. The commands are as follows: ```bash -GF_VERSION=3.2.2 +GF_VERSION=3.2.7 # from GitHub wget https://github.com/taosdata/grafanaplugin/releases/download/v$GF_VERSION/tdengine-datasource-$GF_VERSION.zip # from Grafana diff --git a/docs/en/25-application/01-telegraf.md b/docs/en/25-application/01-telegraf.md index f7003264496e61f33e843a4c8f2ec8227ba571b6..65fb08ee674cc9c952bbcfda57f0366fa129fdf2 100644 --- a/docs/en/25-application/01-telegraf.md +++ b/docs/en/25-application/01-telegraf.md @@ -38,15 +38,9 @@ Download the latest TDengine-server from the [Downloads](http://tdengine.com/en/ ## Data Connection Setup -### Download TDengine plug-in to grafana plug-in directory +### Install Grafana Plugin and Configure Data Source -```bash -1. wget -c https://github.com/taosdata/grafanaplugin/releases/download/v3.1.3/tdengine-datasource-3.1.3.zip -2. sudo unzip tdengine-datasource-3.1.3.zip -d /var/lib/grafana/plugins/ -3. sudo chown grafana:grafana -R /var/lib/grafana/plugins/tdengine -4. echo -e "[plugins]\nallow_loading_unsigned_plugins = tdengine-datasource\n" | sudo tee -a /etc/grafana/grafana.ini -5. sudo systemctl restart grafana-server.service -``` +Please refer to [Install Grafana Plugin and Configure Data Source](/third-party/grafana/#install-grafana-plugin-and-configure-data-source) ### Modify /etc/telegraf/telegraf.conf diff --git a/docs/en/25-application/02-collectd.md b/docs/en/25-application/02-collectd.md index 692cd8d929f04a03e4433bd0b71f84101bc362cb..97412b230941435a08df12c859d63bca68b79ec9 100644 --- a/docs/en/25-application/02-collectd.md +++ b/docs/en/25-application/02-collectd.md @@ -41,15 +41,9 @@ Download the latest TDengine-server from the [Downloads](http://tdengine.com/en/ ## Data Connection Setup -### Copy the TDengine plugin to the grafana plugin directory - -```bash -1. wget -c https://github.com/taosdata/grafanaplugin/releases/download/v3.1.3/tdengine-datasource-3.1.3.zip -2. sudo unzip tdengine-datasource-3.1.3.zip -d /var/lib/grafana/plugins/ -3. sudo chown grafana:grafana -R /var/lib/grafana/plugins/tdengine -4. echo -e "[plugins]\nallow_loading_unsigned_plugins = tdengine-datasource\n" | sudo tee -a /etc/grafana/grafana.ini -5. sudo systemctl restart grafana-server.service -``` +### Install Grafana Plugin and Configure Data Source + +Please refer to [Install Grafana Plugin and Configure Data Source](/third-party/grafana/#install-grafana-plugin-and-configure-data-source) ### Configure collectd diff --git a/docs/examples/python/kafka_example.py b/docs/examples/python/kafka_example.py new file mode 100644 index 0000000000000000000000000000000000000000..735059eec0f3dcf5094810916e66a39db5682560 --- /dev/null +++ b/docs/examples/python/kafka_example.py @@ -0,0 +1,192 @@ +#! encoding = utf-8 +import json +import time +from json import JSONDecodeError +from typing import Callable +import logging +from concurrent.futures import ThreadPoolExecutor, Future + +import taos +from kafka import KafkaConsumer +from kafka.consumer.fetcher import ConsumerRecord + + +class Consumer(object): + DEFAULT_CONFIGS = { + 'kafka_brokers': 'localhost:9092', + 'kafka_topic': 'python_kafka', + 'kafka_group_id': 'taos', + 'taos_host': 'localhost', + 'taos_user': 'root', + 'taos_password': 'taosdata', + 'taos_database': 'power', + 'taos_port': 6030, + 'timezone': None, + 'clean_after_testing': False, + 'bath_consume': True, + 'batch_size': 1000, + 'async_model': True, + 'workers': 10 + } + + LOCATIONS = ['California.SanFrancisco', 'California.LosAngles', 'California.SanDiego', 'California.SanJose', + 'California.PaloAlto', 'California.Campbell', 'California.MountainView', 'California.Sunnyvale', + 'California.SantaClara', 'California.Cupertino'] + + CREATE_DATABASE_SQL = 'create database if not exists {} keep 365 duration 10 buffer 16 wal_level 1' + USE_DATABASE_SQL = 'use {}' + DROP_TABLE_SQL = 'drop table if exists meters' + DROP_DATABASE_SQL = 'drop database if exists {}' + CREATE_STABLE_SQL = 'create stable meters (ts timestamp, current float, voltage int, phase float) ' \ + 'tags (location binary(64), groupId int)' + CREATE_TABLE_SQL = 'create table if not exists {} using meters tags (\'{}\', {})' + INSERT_SQL_HEADER = "insert into " + INSERT_PART_SQL = 'power.{} values (\'{}\', {}, {}, {})' + + def __init__(self, **configs): + self.config: dict = self.DEFAULT_CONFIGS + self.config.update(configs) + self.consumer = KafkaConsumer( + self.config.get('kafka_topic'), # topic + bootstrap_servers=self.config.get('kafka_brokers'), + group_id=self.config.get('kafka_group_id'), + ) + self.taos = taos.connect( + host=self.config.get('taos_host'), + user=self.config.get('taos_user'), + password=self.config.get('taos_password'), + port=self.config.get('taos_port'), + timezone=self.config.get('timezone'), + ) + if self.config.get('async_model'): + self.pool = ThreadPoolExecutor(max_workers=self.config.get('workers')) + self.tasks: list[Future] = [] + # tags and table mapping # key: {location}_{groupId} value: + self.tag_table_mapping = {} + i = 0 + for location in self.LOCATIONS: + for j in range(1, 11): + table_name = 'd{}'.format(i) + self._cache_table(location=location, group_id=j, table_name=table_name) + i += 1 + + def init_env(self): + # create database and table + self.taos.execute(self.DROP_DATABASE_SQL.format(self.config.get('taos_database'))) + self.taos.execute(self.CREATE_DATABASE_SQL.format(self.config.get('taos_database'))) + self.taos.execute(self.USE_DATABASE_SQL.format(self.config.get('taos_database'))) + self.taos.execute(self.DROP_TABLE_SQL) + self.taos.execute(self.CREATE_STABLE_SQL) + for tags, table_name in self.tag_table_mapping.items(): + location, group_id = _get_location_and_group(tags) + self.taos.execute(self.CREATE_TABLE_SQL.format(table_name, location, group_id)) + + def consume(self): + logging.warning('## start consumer topic-[%s]', self.config.get('kafka_topic')) + try: + if self.config.get('bath_consume'): + self._run_batch(self._to_taos_batch) + else: + self._run(self._to_taos) + except KeyboardInterrupt: + logging.warning("## caught keyboard interrupt, stopping") + finally: + self.stop() + + def stop(self): + # close consumer + if self.consumer is not None: + self.consumer.commit() + self.consumer.close() + + # multi thread + if self.config.get('async_model'): + for task in self.tasks: + while not task.done(): + pass + if self.pool is not None: + self.pool.shutdown() + + # clean data + if self.config.get('clean_after_testing'): + self.taos.execute(self.DROP_TABLE_SQL) + self.taos.execute(self.DROP_DATABASE_SQL.format(self.config.get('taos_database'))) + # close taos + if self.taos is not None: + self.taos.close() + + def _run(self, f: Callable[[ConsumerRecord], bool]): + for message in self.consumer: + if self.config.get('async_model'): + self.pool.submit(f(message)) + else: + f(message) + + def _run_batch(self, f: Callable[[list[list[ConsumerRecord]]], None]): + while True: + messages = self.consumer.poll(timeout_ms=500, max_records=self.config.get('batch_size')) + if messages: + if self.config.get('async_model'): + self.pool.submit(f, messages.values()) + else: + f(list(messages.values())) + if not messages: + time.sleep(0.1) + + def _to_taos(self, message: ConsumerRecord) -> bool: + sql = self.INSERT_SQL_HEADER + self._build_sql(message.value) + if len(sql) == 0: # decode error, skip + return True + logging.info('## insert sql %s', sql) + return self.taos.execute(sql=sql) == 1 + + def _to_taos_batch(self, messages: list[list[ConsumerRecord]]): + sql = self._build_sql_batch(messages=messages) + if len(sql) == 0: # decode error, skip + return + self.taos.execute(sql=sql) + + def _build_sql(self, msg_value: str) -> str: + try: + data = json.loads(msg_value) + except JSONDecodeError as e: + logging.error('## decode message [%s] error ', msg_value, e) + return '' + location = data.get('location') + group_id = data.get('groupId') + ts = data.get('ts') + current = data.get('current') + voltage = data.get('voltage') + phase = data.get('phase') + + table_name = self._get_table_name(location=location, group_id=group_id) + return self.INSERT_PART_SQL.format(table_name, ts, current, voltage, phase) + + def _build_sql_batch(self, messages: list[list[ConsumerRecord]]) -> str: + sql_list = [] + for partition_messages in messages: + for message in partition_messages: + sql_list.append(self._build_sql(message.value)) + + return self.INSERT_SQL_HEADER + ' '.join(sql_list) + + def _cache_table(self, location: str, group_id: int, table_name: str): + self.tag_table_mapping[_tag_table_mapping_key(location=location, group_id=group_id)] = table_name + + def _get_table_name(self, location: str, group_id: int) -> str: + return self.tag_table_mapping.get(_tag_table_mapping_key(location=location, group_id=group_id)) + + +def _tag_table_mapping_key(location: str, group_id: int): + return '{}_{}'.format(location, group_id) + + +def _get_location_and_group(key: str) -> (str, int): + fields = key.split('_') + return fields[0], fields[1] + + +if __name__ == '__main__': + consumer = Consumer(async_model=True) + consumer.init_env() + consumer.consume() \ No newline at end of file diff --git a/docs/zh/07-develop/03-insert-data/20-kafka-writting.mdx b/docs/zh/07-develop/03-insert-data/20-kafka-writting.mdx new file mode 100644 index 0000000000000000000000000000000000000000..32d3c2e5cbe5a167e4d7cb459ace8259c407b693 --- /dev/null +++ b/docs/zh/07-develop/03-insert-data/20-kafka-writting.mdx @@ -0,0 +1,47 @@ +--- +title: 从 Kafka 写入 +--- + +import Tabs from "@theme/Tabs"; +import TabItem from "@theme/TabItem"; +import PyKafka from "./_py_kafka.mdx"; + +## Kafka 介绍 + +Apache Kafka 是开源的分布式消息分发平台,被广泛应用于高性能数据管道、流式数据分析、数据集成和事件驱动类型的应用程序。Kafka 包含 Producer、Consumer 和 Topic,其中 Producer 是向 Kafka 发送消息的进程,Consumer 是从 Kafka 消费消息的进程。Kafka 相关概念可以参考[官方文档](https://kafka.apache.org/documentation/#gettingStarted)。 + + +### kafka topic + +Kafka 的消息按 topic 组织,每个 topic 会有一到多个 partition。可以通过 kafka 的 `kafka-topics` 管理 topic。 + +创建名为 `kafka-events` 的topic: + +``` +bin/kafka-topics.sh --create --topic kafka-events --bootstrap-server localhost:9092 +``` + +修改 `kafka-events` 的 partition 数量为 3: + +``` +bin/kafka-topics.sh --alter --topic kafka-events --partitions 3 --bootstrap-server=localhost:9092 +``` + +展示所有的 topic 和 partition: + +``` +bin/kafka-topics.sh --bootstrap-server=localhost:9092 --describe +``` + +## 写入 TDengine + +TDengine 支持 Sql 方式和 Schemaless 方式的数据写入,Sql 方式数据写入可以参考 [TDengine SQL 写入](/develop/insert-data/sql-writing/) 和 [TDengine 高效写入](/develop/insert-data/high-volume/)。Schemaless 方式数据写入可以参考 [TDengine Schemaless 写入](/reference/schemaless/) 文档。 + +## 示例代码 + + + + + + + diff --git a/docs/zh/07-develop/03-insert-data/02-influxdb-line.mdx b/docs/zh/07-develop/03-insert-data/30-influxdb-line.mdx similarity index 100% rename from docs/zh/07-develop/03-insert-data/02-influxdb-line.mdx rename to docs/zh/07-develop/03-insert-data/30-influxdb-line.mdx diff --git a/docs/zh/07-develop/03-insert-data/03-opentsdb-telnet.mdx b/docs/zh/07-develop/03-insert-data/40-opentsdb-telnet.mdx similarity index 100% rename from docs/zh/07-develop/03-insert-data/03-opentsdb-telnet.mdx rename to docs/zh/07-develop/03-insert-data/40-opentsdb-telnet.mdx diff --git a/docs/zh/07-develop/03-insert-data/04-opentsdb-json.mdx b/docs/zh/07-develop/03-insert-data/50-opentsdb-json.mdx similarity index 100% rename from docs/zh/07-develop/03-insert-data/04-opentsdb-json.mdx rename to docs/zh/07-develop/03-insert-data/50-opentsdb-json.mdx diff --git a/docs/zh/07-develop/03-insert-data/05-high-volume.md b/docs/zh/07-develop/03-insert-data/60-high-volume.md similarity index 100% rename from docs/zh/07-develop/03-insert-data/05-high-volume.md rename to docs/zh/07-develop/03-insert-data/60-high-volume.md diff --git a/docs/zh/07-develop/03-insert-data/_py_kafka.mdx b/docs/zh/07-develop/03-insert-data/_py_kafka.mdx new file mode 100644 index 0000000000000000000000000000000000000000..cd7edf557d4ae41df0f55f4456fe405515132e51 --- /dev/null +++ b/docs/zh/07-develop/03-insert-data/_py_kafka.mdx @@ -0,0 +1,60 @@ +### python Kafka 客户端 + +Kafka 的 python 客户端可以参考文档 [kafka client](https://cwiki.apache.org/confluence/display/KAFKA/Clients#Clients-Python)。推荐使用 [confluent-kafka-python](https://github.com/confluentinc/confluent-kafka-python) 和 [kafka-python](http://github.com/dpkp/kafka-python)。以下示例以 [kafka-python](http://github.com/dpkp/kafka-python) 为例。 + +### 从 Kafka 消费数据 + +Kafka 客户端采用 pull 的方式从 Kafka 消费数据,可以采用单条消费的方式或批量消费的方式读取数据。使用 [kafka-python](http://github.com/dpkp/kafka-python) 客户端单条消费数据的示例如下: + +``` +from kafka import KafkaConsumer +consumer = KafkaConsumer('my_favorite_topic') +for msg in consumer: + print (msg) +``` + +单条消费的方式在数据流量大的情况下往往存在性能瓶颈,导致 Kafka 消息积压,更推荐使用批量消费的方式消费数据。使用 [kafka-python](http://github.com/dpkp/kafka-python) 客户端批量消费数据的示例如下: + +``` +from kafka import KafkaConsumer +consumer = KafkaConsumer('my_favorite_topic') +while True: + msgs = consumer.poll(timeout_ms=500, max_records=1000) + if msgs: + print (msgs) +``` + +### Python 多线程 + +为了提高数据写入效率,通常采用多线程的方式写入数据,可以使用 python 线程池 ThreadPoolExecutor 实现多线程。示例代码如下: + +``` +from concurrent.futures import ThreadPoolExecutor, Future +pool = ThreadPoolExecutor(max_workers=10) +pool.submit(...) +``` + +### Python 多进程 + +单个python进程不能充分发挥多核 CPU 的性能,有时候我们会选择多进程的方式。在多进程的情况下,需要注意,Kafka Consumer 的数量应该小于等于 Kafka Topic Partition 数量。Python 多进程示例代码如下: + +``` +from multiprocessing import Process + +ps = [] +for i in range(5): + p = Process(target=Consumer().consume()) + p.start() + ps.append(p) + +for p in ps: + p.join() +``` + +除了 Python 内置的多线程和多进程方式,还可以通过第三方库 gunicorn 实现并发。 + +### 完整示例 + +```py +{{#include docs/examples/python/kafka_example.py}} +``` diff --git a/docs/zh/25-application/01-telegraf.md b/docs/zh/25-application/01-telegraf.md index 6338264d171b9246b2e99d418035e061d1068a4b..ec263d77927d2960ef684d9d94c9cad4073429b9 100644 --- a/docs/zh/25-application/01-telegraf.md +++ b/docs/zh/25-application/01-telegraf.md @@ -39,15 +39,9 @@ IT 运维监测数据通常都是对时间特性比较敏感的数据,例如 ## 数据链路设置 -### 下载 TDengine 插件到 Grafana 插件目录 +### 安装 Grafana Plugin 并配置数据源 -```bash -1. wget -c https://github.com/taosdata/grafanaplugin/releases/download/v3.1.3/tdengine-datasource-3.1.3.zip -2. sudo unzip tdengine-datasource-3.1.3.zip -d /var/lib/grafana/plugins/ -3. sudo chown grafana:grafana -R /var/lib/grafana/plugins/tdengine -4. echo -e "[plugins]\nallow_loading_unsigned_plugins = tdengine-datasource\n" | sudo tee -a /etc/grafana/grafana.ini -5. sudo systemctl restart grafana-server.service -``` +请参考[安装 Grafana Plugin 并配置数据源](/third-party/grafana/#%E5%AE%89%E8%A3%85-grafana-plugin-%E5%B9%B6%E9%85%8D%E7%BD%AE%E6%95%B0%E6%8D%AE%E6%BA%90)。 ### 修改 /etc/telegraf/telegraf.conf diff --git a/docs/zh/25-application/02-collectd.md b/docs/zh/25-application/02-collectd.md index c6230f48abb545e3064f406d9005a4a3ba8ea5ba..8b39a6431ddeebae2179f4dc535cabe841a499a1 100644 --- a/docs/zh/25-application/02-collectd.md +++ b/docs/zh/25-application/02-collectd.md @@ -41,15 +41,9 @@ IT 运维监测数据通常都是对时间特性比较敏感的数据,例如 ## 数据链路设置 -### 复制 TDengine 插件到 grafana 插件目录 - -```bash -1. wget -c https://github.com/taosdata/grafanaplugin/releases/download/v3.1.3/tdengine-datasource-3.1.3.zip -2. sudo unzip tdengine-datasource-3.1.3.zip -d /var/lib/grafana/plugins/ -3. sudo chown grafana:grafana -R /var/lib/grafana/plugins/tdengine -4. echo -e "[plugins]\nallow_loading_unsigned_plugins = tdengine-datasource\n" | sudo tee -a /etc/grafana/grafana.ini -5. sudo systemctl restart grafana-server.service -``` +### 安装 Grafana Plugin 并配置数据源 + +请参考[安装 Grafana Plugin 并配置数据源](/third-party/grafana/#%E5%AE%89%E8%A3%85-grafana-plugin-%E5%B9%B6%E9%85%8D%E7%BD%AE%E6%95%B0%E6%8D%AE%E6%BA%90)。 ### 配置 collectd diff --git a/include/common/tglobal.h b/include/common/tglobal.h index 89ec9dc6c833a7173d83c67dc386a476ff5bf4ef..005cf36d5ea11e8adaeb61a95a9536c1af746982 100644 --- a/include/common/tglobal.h +++ b/include/common/tglobal.h @@ -137,6 +137,7 @@ extern int64_t tsWalFsyncDataSizeLimit; // internal extern int32_t tsTransPullupInterval; extern int32_t tsMqRebalanceInterval; +extern int32_t tsStreamCheckpointTickInterval; extern int32_t tsTtlUnit; extern int32_t tsTtlPushInterval; extern int32_t tsGrantHBInterval; diff --git a/include/common/tmsg.h b/include/common/tmsg.h index cf9a27b41be53b919985ea2d31ebb4e6a32bdf8e..0a082a37e408ee1807272d0232849329b7d46603 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -1148,6 +1148,13 @@ typedef struct { int32_t tSerializeSMTimerMsg(void* buf, int32_t bufLen, SMTimerReq* pReq); int32_t tDeserializeSMTimerMsg(void* buf, int32_t bufLen, SMTimerReq* pReq); +typedef struct { + int64_t tick; +} SMStreamTickReq; + +int32_t tSerializeSMStreamTickMsg(void* buf, int32_t bufLen, SMStreamTickReq* pReq); +int32_t tDeserializeSMStreamTickMsg(void* buf, int32_t bufLen, SMStreamTickReq* pReq); + typedef struct { int32_t id; uint16_t port; // node sync Port @@ -1748,6 +1755,8 @@ typedef struct { int64_t watermark; int32_t numOfTags; SArray* pTags; // array of SField + // 3.0.20 + int64_t checkpointFreq; // ms } SCMCreateStreamReq; typedef struct { @@ -1947,6 +1956,12 @@ typedef struct { SHashObj* rebSubHash; // SHashObj } SMqDoRebalanceMsg; +typedef struct { + int64_t streamId; + int64_t checkpointId; + char streamName[TSDB_STREAM_FNAME_LEN]; +} SMStreamDoCheckpointMsg; + typedef struct { int64_t status; } SMVSubscribeRsp; diff --git a/include/common/tmsgdef.h b/include/common/tmsgdef.h index e80766d249fe4ea3f2134a5a5e7b270dd7522f2e..7833bdf1393c72b988b1553d691e7d8936b14603 100644 --- a/include/common/tmsgdef.h +++ b/include/common/tmsgdef.h @@ -172,6 +172,8 @@ enum { TD_DEF_MSG_TYPE(TDMT_MND_SERVER_VERSION, "server-version", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_UPTIME_TIMER, "uptime-timer", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_TMQ_LOST_CONSUMER_CLEAR, "lost-consumer-clear", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_MND_STREAM_CHECKPOINT_TIMER, "stream-checkpoint-tmr", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_MND_STREAM_BEGIN_CHECKPOINT, "stream-begin-checkpoint", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_MND_MAX_MSG, "mnd-max", NULL, NULL) TD_NEW_MSG_SEG(TDMT_VND_MSG) @@ -241,8 +243,11 @@ enum { TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_DISPATCH, "stream-task-dispatch", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_UNUSED1, "stream-unused1", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_RETRIEVE, "stream-retrieve", NULL, NULL) - TD_DEF_MSG_TYPE(TDMT_STREAM_RECOVER_FINISH, "vnode-stream-finish", NULL, NULL) - TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_CHECK, "vnode-stream-task-check", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_STREAM_RECOVER_FINISH, "stream-recover-finish", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_CHECK, "stream-task-check", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_CHECKPOINT, "stream-checkpoint", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_REPORT_CHECKPOINT, "stream-report-checkpoint", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_RESTORE_CHECKPOINT, "stream-restore-checkpoint", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_MAX_MSG, "stream-max", NULL, NULL) TD_NEW_MSG_SEG(TDMT_MON_MSG) @@ -282,6 +287,7 @@ enum { TD_DEF_MSG_TYPE(TDMT_VND_STREAM_TRIGGER, "vnode-stream-trigger", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_STREAM_RECOVER_NONBLOCKING_STAGE, "vnode-stream-recover1", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_STREAM_RECOVER_BLOCKING_STAGE, "vnode-stream-recover2", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_VND_STREAM_CHECK_POINT_SOURCE, "vnode-stream-checkpoint-source", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_STREAM_MAX_MSG, "vnd-stream-max", NULL, NULL) TD_NEW_MSG_SEG(TDMT_VND_TMQ_MSG) diff --git a/include/libs/executor/executor.h b/include/libs/executor/executor.h index d210004760b28084afb0c4bd5b34157fb18e001d..412b4b4cf6c950113a2f7e6d319692695cde0c07 100644 --- a/include/libs/executor/executor.h +++ b/include/libs/executor/executor.h @@ -152,7 +152,7 @@ void qCleanExecTaskBlockBuf(qTaskInfo_t tinfo); * @param tinfo qhandle * @return */ -int32_t qAsyncKillTask(qTaskInfo_t tinfo); +int32_t qAsyncKillTask(qTaskInfo_t tinfo, int32_t rspCode); /** * destroy query info structure diff --git a/include/libs/qcom/query.h b/include/libs/qcom/query.h index f51aa88485a9618365224744464befb3461e704a..63192812122025b10b85c59af343216df139d1c0 100644 --- a/include/libs/qcom/query.h +++ b/include/libs/qcom/query.h @@ -260,15 +260,16 @@ extern int32_t (*queryProcessMsgRsp[TDMT_MAX])(void* output, char* msg, int32_t (NEED_CLIENT_RM_TBLMETA_ERROR(_code) || NEED_CLIENT_REFRESH_VG_ERROR(_code) || \ NEED_CLIENT_REFRESH_TBLMETA_ERROR(_code)) -#define SYNC_UNKNOWN_LEADER_REDIRECT_ERROR(_code) ((_code) == TSDB_CODE_SYN_NOT_LEADER || (_code) == TSDB_CODE_SYN_INTERNAL_ERROR) +#define SYNC_UNKNOWN_LEADER_REDIRECT_ERROR(_code) ((_code) == TSDB_CODE_SYN_NOT_LEADER || (_code) == TSDB_CODE_SYN_INTERNAL_ERROR || (_code) == TSDB_CODE_VND_STOPPED) #define SYNC_SELF_LEADER_REDIRECT_ERROR(_code) ((_code) == TSDB_CODE_SYN_NOT_LEADER || (_code) == TSDB_CODE_SYN_INTERNAL_ERROR) #define SYNC_OTHER_LEADER_REDIRECT_ERROR(_code) (false) // used later -#define NEED_REDIRECT_ERROR(_code) \ - ((_code) == TSDB_CODE_RPC_REDIRECT || (_code) == TSDB_CODE_RPC_NETWORK_UNAVAIL || \ +#define NEED_REDIRECT_ERROR(_code) \ + ((_code) == TSDB_CODE_RPC_REDIRECT || (_code) == TSDB_CODE_RPC_NETWORK_UNAVAIL || \ (_code) == TSDB_CODE_NODE_NOT_DEPLOYED || SYNC_UNKNOWN_LEADER_REDIRECT_ERROR(_code) || \ SYNC_SELF_LEADER_REDIRECT_ERROR(_code) || SYNC_OTHER_LEADER_REDIRECT_ERROR(_code) || \ - (_code) == TSDB_CODE_APP_NOT_READY || (_code) == TSDB_CODE_RPC_BROKEN_LINK) + (_code) == TSDB_CODE_APP_NOT_READY || (_code) == TSDB_CODE_RPC_BROKEN_LINK || \ + (_code) == TSDB_CODE_APP_IS_STARTING || (_code) == TSDB_CODE_APP_IS_STOPPING) #define NEED_CLIENT_RM_TBLMETA_REQ(_type) \ ((_type) == TDMT_VND_CREATE_TABLE || (_type) == TDMT_MND_CREATE_STB || (_type) == TDMT_VND_DROP_TABLE || \ diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 60415382b3e85dd38dc4c1191d26a73d935080ca..16cf96072426aee9cf1608accccd840ae56da7ac 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -275,31 +275,6 @@ typedef struct { SEpSet epSet; } SStreamChildEpInfo; -typedef struct { - int32_t srcNodeId; - int32_t srcChildId; - int64_t stateSaveVer; - int64_t stateProcessedVer; -} SStreamCheckpointInfo; - -typedef struct { - int64_t streamId; - int64_t checkTs; - int32_t checkpointId; // incremental - int32_t taskId; - SArray* checkpointVer; // SArray -} SStreamMultiVgCheckpointInfo; - -typedef struct { - int32_t taskId; - int32_t checkpointId; // incremental -} SStreamCheckpointKey; - -typedef struct { - int32_t taskId; - SArray* checkpointVer; -} SStreamRecoveringState; - typedef struct SStreamTask { int64_t streamId; int32_t taskId; @@ -364,6 +339,10 @@ typedef struct SStreamTask { int64_t checkReqId; SArray* checkReqIds; // shuffle int32_t refCnt; + + int64_t checkpointingId; + int32_t checkpointAlignCnt; + } SStreamTask; int32_t tEncodeStreamEpInfo(SEncoder* pEncoder, const SStreamChildEpInfo* pInfo); @@ -509,6 +488,60 @@ typedef struct { int32_t tEncodeSStreamRecoverFinishReq(SEncoder* pEncoder, const SStreamRecoverFinishReq* pReq); int32_t tDecodeSStreamRecoverFinishReq(SDecoder* pDecoder, SStreamRecoverFinishReq* pReq); +typedef struct { + int64_t streamId; + int64_t checkpointId; + int32_t taskId; + int32_t nodeId; + int64_t expireTime; +} SStreamCheckpointSourceReq; + +typedef struct { + int64_t streamId; + int64_t checkpointId; + int32_t taskId; + int32_t nodeId; + int64_t expireTime; +} SStreamCheckpointSourceRsp; + +int32_t tEncodeSStreamCheckpointSourceReq(SEncoder* pEncoder, const SStreamCheckpointSourceReq* pReq); +int32_t tDecodeSStreamCheckpointSourceReq(SDecoder* pDecoder, SStreamCheckpointSourceReq* pReq); + +int32_t tEncodeSStreamCheckpointSourceRsp(SEncoder* pEncoder, const SStreamCheckpointSourceRsp* pRsp); +int32_t tDecodeSStreamCheckpointSourceRsp(SDecoder* pDecoder, SStreamCheckpointSourceRsp* pRsp); + +typedef struct { + SMsgHead msgHead; + int64_t streamId; + int64_t checkpointId; + int32_t downstreamTaskId; + int32_t downstreamNodeId; + int32_t upstreamTaskId; + int32_t upstreamNodeId; + int32_t childId; + int64_t expireTime; + int8_t taskLevel; +} SStreamCheckpointReq; + +typedef struct { + SMsgHead msgHead; + int64_t streamId; + int64_t checkpointId; + int32_t downstreamTaskId; + int32_t downstreamNodeId; + int32_t upstreamTaskId; + int32_t upstreamNodeId; + int32_t childId; + int64_t expireTime; + int8_t taskLevel; +} SStreamCheckpointRsp; + +int32_t tEncodeSStreamCheckpointReq(SEncoder* pEncoder, const SStreamCheckpointReq* pReq); +int32_t tDecodeSStreamCheckpointReq(SDecoder* pDecoder, SStreamCheckpointReq* pReq); + +int32_t tEncodeSStreamCheckpointRsp(SEncoder* pEncoder, const SStreamCheckpointRsp* pRsp); +int32_t tDecodeSStreamCheckpointRsp(SDecoder* pDecoder, SStreamCheckpointRsp* pRsp); + typedef struct { int64_t streamId; int32_t downstreamTaskId; @@ -598,18 +631,22 @@ void streamMetaClose(SStreamMeta* streamMeta); int32_t streamMetaAddTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* pTask); int32_t streamMetaAddSerializedTask(SStreamMeta* pMeta, int64_t startVer, char* msg, int32_t msgLen); -int32_t streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId); SStreamTask* streamMetaGetTask(SStreamMeta* pMeta, int32_t taskId); SStreamTask* streamMetaAcquireTask(SStreamMeta* pMeta, int32_t taskId); void streamMetaReleaseTask(SStreamMeta* pMeta, SStreamTask* pTask); -void streamMetaRemoveTask1(SStreamMeta* pMeta, int32_t taskId); +void streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId); int32_t streamMetaBegin(SStreamMeta* pMeta); int32_t streamMetaCommit(SStreamMeta* pMeta); int32_t streamMetaRollBack(SStreamMeta* pMeta); int32_t streamLoadTasks(SStreamMeta* pMeta); +// checkpoint +int32_t streamProcessCheckpointSourceReq(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointSourceReq* pReq); +int32_t streamProcessCheckpointReq(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointReq* pReq); +int32_t streamProcessCheckpointRsp(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointRsp* pRsp); + #ifdef __cplusplus } #endif diff --git a/include/util/taoserror.h b/include/util/taoserror.h index 25d37020cc65518e4e15e5b0b354ee691d5970ee..a6155f6611f6aa0acb57a56625f3d38c98bb9bc2 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -40,21 +40,37 @@ int32_t* taosGetErrno(); #define TSDB_CODE_FAILED -1 // unknown or needn't tell detail error // rpc +// #define TSDB_CODE_RPC_ACTION_IN_PROGRESS TAOS_DEF_ERROR_CODE(0, 0x0001) //2.x +// #define TSDB_CODE_RPC_AUTH_REQUIRED TAOS_DEF_ERROR_CODE(0, 0x0002) //2.x #define TSDB_CODE_RPC_AUTH_FAILURE TAOS_DEF_ERROR_CODE(0, 0x0003) #define TSDB_CODE_RPC_REDIRECT TAOS_DEF_ERROR_CODE(0, 0x0004) +// #define TSDB_CODE_RPC_NOT_READY TAOS_DEF_ERROR_CODE(0, 0x0005) //2.x +// #define TSDB_CODE_RPC_ALREADY_PROCESSED TAOS_DEF_ERROR_CODE(0, 0x0006) //2.x +// #define TSDB_CODE_RPC_LAST_SESSION_NOT_FINI. TAOS_DEF_ERROR_CODE(0, 0x0007) //2.x +// #define TSDB_CODE_RPC_MISMATCHED_LINK_ID TAOS_DEF_ERROR_CODE(0, 0x0008) //2.x +// #define TSDB_CODE_RPC_TOO_SLOW TAOS_DEF_ERROR_CODE(0, 0x0009) //2.x +// #define TSDB_CODE_RPC_MAX_SESSIONS TAOS_DEF_ERROR_CODE(0, 0x000A) //2.x #define TSDB_CODE_RPC_NETWORK_UNAVAIL TAOS_DEF_ERROR_CODE(0, 0x000B) +// #define TSDB_CODE_RPC_APP_ERROR TAOS_DEF_ERROR_CODE(0, 0x000C) //2.x +// #define TSDB_CODE_RPC_UNEXPECTED_RESPONSE TAOS_DEF_ERROR_CODE(0, 0x000D) //2.x +// #define TSDB_CODE_RPC_INVALID_VALUE TAOS_DEF_ERROR_CODE(0, 0x000E) //2.x +// #define TSDB_CODE_RPC_INVALID_TRAN_ID TAOS_DEF_ERROR_CODE(0, 0x000F) //2.x +// #define TSDB_CODE_RPC_INVALID_SESSION_ID TAOS_DEF_ERROR_CODE(0, 0x0010) //2.x +// #define TSDB_CODE_RPC_INVALID_MSG_TYPE TAOS_DEF_ERROR_CODE(0, 0x0011) //2.x +// #define TSDB_CODE_RPC_INVALID_RESPONSE_TYPE TAOS_DEF_ERROR_CODE(0, 0x0012) //2.x +#define TSDB_CODE_TIME_UNSYNCED TAOS_DEF_ERROR_CODE(0, 0x0013) +#define TSDB_CODE_APP_NOT_READY TAOS_DEF_ERROR_CODE(0, 0x0014) #define TSDB_CODE_RPC_FQDN_ERROR TAOS_DEF_ERROR_CODE(0, 0x0015) +// #define TSDB_CODE_RPC_INVALID_VERSION TAOS_DEF_ERROR_CODE(0, 0x0016) //2.x #define TSDB_CODE_RPC_PORT_EADDRINUSE TAOS_DEF_ERROR_CODE(0, 0x0017) #define TSDB_CODE_RPC_BROKEN_LINK TAOS_DEF_ERROR_CODE(0, 0x0018) #define TSDB_CODE_RPC_TIMEOUT TAOS_DEF_ERROR_CODE(0, 0x0019) //common & util -#define TSDB_CODE_TIME_UNSYNCED TAOS_DEF_ERROR_CODE(0, 0x0013) -#define TSDB_CODE_APP_NOT_READY TAOS_DEF_ERROR_CODE(0, 0x0014) - #define TSDB_CODE_OPS_NOT_SUPPORT TAOS_DEF_ERROR_CODE(0, 0x0100) #define TSDB_CODE_MEMORY_CORRUPTED TAOS_DEF_ERROR_CODE(0, 0x0101) #define TSDB_CODE_OUT_OF_MEMORY TAOS_DEF_ERROR_CODE(0, 0x0102) +// #define TSDB_CODE_COM_INVALID_CFG_MSG TAOS_DEF_ERROR_CODE(0, 0x0103) // 2.x #define TSDB_CODE_FILE_CORRUPTED TAOS_DEF_ERROR_CODE(0, 0x0104) #define TSDB_CODE_REF_NO_MEMORY TAOS_DEF_ERROR_CODE(0, 0x0105) #define TSDB_CODE_REF_FULL TAOS_DEF_ERROR_CODE(0, 0x0106) @@ -69,7 +85,7 @@ int32_t* taosGetErrno(); #define TSDB_CODE_OUT_OF_SHM_MEM TAOS_DEF_ERROR_CODE(0, 0x0113) #define TSDB_CODE_INVALID_SHM_ID TAOS_DEF_ERROR_CODE(0, 0x0114) #define TSDB_CODE_INVALID_MSG TAOS_DEF_ERROR_CODE(0, 0x0115) -#define TSDB_CODE_INVALID_MSG_LEN TAOS_DEF_ERROR_CODE(0, 0x0116) +#define TSDB_CODE_INVALID_MSG_LEN TAOS_DEF_ERROR_CODE(0, 0x0116) // #define TSDB_CODE_INVALID_PTR TAOS_DEF_ERROR_CODE(0, 0x0117) #define TSDB_CODE_INVALID_PARA TAOS_DEF_ERROR_CODE(0, 0x0118) #define TSDB_CODE_INVALID_CFG TAOS_DEF_ERROR_CODE(0, 0x0119) @@ -81,7 +97,7 @@ int32_t* taosGetErrno(); #define TSDB_CODE_CHECKSUM_ERROR TAOS_DEF_ERROR_CODE(0, 0x011F) #define TSDB_CODE_COMPRESS_ERROR TAOS_DEF_ERROR_CODE(0, 0x0120) -#define TSDB_CODE_MSG_NOT_PROCESSED TAOS_DEF_ERROR_CODE(0, 0x0121) +#define TSDB_CODE_MSG_NOT_PROCESSED TAOS_DEF_ERROR_CODE(0, 0x0121) // #define TSDB_CODE_CFG_NOT_FOUND TAOS_DEF_ERROR_CODE(0, 0x0122) #define TSDB_CODE_REPEAT_INIT TAOS_DEF_ERROR_CODE(0, 0x0123) #define TSDB_CODE_DUP_KEY TAOS_DEF_ERROR_CODE(0, 0x0124) @@ -94,6 +110,9 @@ int32_t* taosGetErrno(); #define TSDB_CODE_NO_DISKSPACE TAOS_DEF_ERROR_CODE(0, 0x012B) #define TSDB_CODE_TIMEOUT_ERROR TAOS_DEF_ERROR_CODE(0, 0x012C) +#define TSDB_CODE_APP_IS_STARTING TAOS_DEF_ERROR_CODE(0, 0x0130) // +#define TSDB_CODE_APP_IS_STOPPING TAOS_DEF_ERROR_CODE(0, 0x0131) // + //client #define TSDB_CODE_TSC_INVALID_OPERATION TAOS_DEF_ERROR_CODE(0, 0x0200) #define TSDB_CODE_TSC_INVALID_QHANDLE TAOS_DEF_ERROR_CODE(0, 0x0201) @@ -324,6 +343,7 @@ int32_t* taosGetErrno(); #define TSDB_CODE_VND_COL_NOT_EXISTS TAOS_DEF_ERROR_CODE(0, 0x0526) #define TSDB_CODE_VND_COL_SUBSCRIBED TAOS_DEF_ERROR_CODE(0, 0x0527) #define TSDB_CODE_VND_NO_AVAIL_BUFPOOL TAOS_DEF_ERROR_CODE(0, 0x0528) +#define TSDB_CODE_VND_STOPPED TAOS_DEF_ERROR_CODE(0, 0x0529) // tsdb #define TSDB_CODE_TDB_INVALID_TABLE_ID TAOS_DEF_ERROR_CODE(0, 0x0600) diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index ab46ba24cffbfbe6e04989cc892c1bb40c856360..2e8cb5e5f739110ce75c94bea22aa820862f97d0 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -167,6 +167,7 @@ int64_t tsWalFsyncDataSizeLimit = (100 * 1024 * 1024L); // internal int32_t tsTransPullupInterval = 2; int32_t tsMqRebalanceInterval = 2; +int32_t tsStreamCheckpointTickInterval = 1; int32_t tsTtlUnit = 86400; int32_t tsTtlPushInterval = 86400; int32_t tsGrantHBInterval = 60; diff --git a/source/common/src/tmsg.c b/source/common/src/tmsg.c index 3ea434a44991114e964e798a612e9059e5a37dcd..5b16a55ae3906171788e319014f597203eccd1a8 100644 --- a/source/common/src/tmsg.c +++ b/source/common/src/tmsg.c @@ -3748,6 +3748,31 @@ int32_t tDeserializeSMTimerMsg(void *buf, int32_t bufLen, SMTimerReq *pReq) { return 0; } +int32_t tSerializeSMStreamTickMsg(void *buf, int32_t bufLen, SMStreamTickReq *pReq) { + SEncoder encoder = {0}; + tEncoderInit(&encoder, buf, bufLen); + + if (tStartEncode(&encoder) < 0) return -1; + if (tEncodeI64(&encoder, pReq->tick) < 0) return -1; + tEndEncode(&encoder); + + int32_t tlen = encoder.pos; + tEncoderClear(&encoder); + return tlen; +} + +int32_t tDeserializeSMStreamTickMsg(void *buf, int32_t bufLen, SMStreamTickReq *pReq) { + SDecoder decoder = {0}; + tDecoderInit(&decoder, buf, bufLen); + + if (tStartDecode(&decoder) < 0) return -1; + if (tDecodeI64(&decoder, &pReq->tick) < 0) return -1; + tEndDecode(&decoder); + + tDecoderClear(&decoder); + return 0; +} + int32_t tEncodeSReplica(SEncoder *pEncoder, SReplica *pReplica) { if (tEncodeI32(pEncoder, pReplica->id) < 0) return -1; if (tEncodeU16(pEncoder, pReplica->port) < 0) return -1; diff --git a/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c b/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c index 07a612bb351e1fe6ed1dc93cb993ebdea42310dc..30ef7b9542be6a3365dcd49284d96f4edbb26016 100644 --- a/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c +++ b/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c @@ -144,6 +144,7 @@ static void dmProcessMgmtQueue(SQueueInfo *pInfo, SRpcMsg *pMsg) { break; default: terrno = TSDB_CODE_MSG_NOT_PROCESSED; + dGError("msg:%p, not processed in mgmt queue", pMsg); break; } diff --git a/source/dnode/mgmt/node_mgmt/src/dmTransport.c b/source/dnode/mgmt/node_mgmt/src/dmTransport.c index 95656fd76c510863c20852c694b0a6f09386c72f..d037cc33752fd1255e1e6dba967e9214a3cea664 100644 --- a/source/dnode/mgmt/node_mgmt/src/dmTransport.c +++ b/source/dnode/mgmt/node_mgmt/src/dmTransport.c @@ -51,13 +51,15 @@ static inline void dmSendRedirectRsp(SRpcMsg *pMsg, const SEpSet *pNewEpSet) { } int32_t dmProcessNodeMsg(SMgmtWrapper *pWrapper, SRpcMsg *pMsg) { + const STraceId *trace = &pMsg->info.traceId; + NodeMsgFp msgFp = pWrapper->msgFps[TMSG_INDEX(pMsg->msgType)]; if (msgFp == NULL) { terrno = TSDB_CODE_MSG_NOT_PROCESSED; + dGError("msg:%p, not processed since no handler", pMsg); return -1; } - const STraceId *trace = &pMsg->info.traceId; dGTrace("msg:%p, will be processed by %s", pMsg, pWrapper->name); pMsg->info.wrapper = pWrapper; return (*msgFp)(pWrapper->pMgmt, pMsg); @@ -99,18 +101,23 @@ static void dmProcessRpcMsg(SDnode *pDnode, SRpcMsg *pRpc, SEpSet *pEpSet) { dmProcessServerStartupStatus(pDnode, pRpc); return; } else { - terrno = TSDB_CODE_APP_NOT_READY; + if (pDnode->status == DND_STAT_INIT) { + terrno = TSDB_CODE_APP_IS_STARTING; + } else { + terrno = TSDB_CODE_APP_IS_STOPPING; + } goto _OVER; } } - if (IsReq(pRpc) && pRpc->pCont == NULL) { + if (pRpc->pCont == NULL && (IsReq(pRpc) || pRpc->contLen != 0)) { dGError("msg:%p, type:%s pCont is NULL", pRpc, TMSG_INFO(pRpc->msgType)); terrno = TSDB_CODE_INVALID_MSG_LEN; goto _OVER; } if (pHandle->defaultNtype == NODE_END) { + dGError("msg:%p, type:%s not processed since no handle", pRpc, TMSG_INFO(pRpc->msgType)); terrno = TSDB_CODE_MSG_NOT_PROCESSED; goto _OVER; } @@ -234,7 +241,8 @@ static inline void dmReleaseHandle(SRpcHandleInfo *pHandle, int8_t type) { rpcRe static bool rpcRfp(int32_t code, tmsg_t msgType) { if (code == TSDB_CODE_RPC_REDIRECT || code == TSDB_CODE_RPC_NETWORK_UNAVAIL || code == TSDB_CODE_NODE_NOT_DEPLOYED || - code == TSDB_CODE_SYN_NOT_LEADER || code == TSDB_CODE_APP_NOT_READY || code == TSDB_CODE_RPC_BROKEN_LINK) { + code == TSDB_CODE_SYN_NOT_LEADER || code == TSDB_CODE_APP_NOT_READY || code == TSDB_CODE_RPC_BROKEN_LINK || + code == TSDB_CODE_VND_STOPPED) { if (msgType == TDMT_SCH_QUERY || msgType == TDMT_SCH_MERGE_QUERY || msgType == TDMT_SCH_FETCH || msgType == TDMT_SCH_MERGE_FETCH) { return false; diff --git a/source/dnode/mnode/impl/inc/mndDef.h b/source/dnode/mnode/impl/inc/mndDef.h index 7ecf60dc2d0de1b0ff9f9e32d64a7f3ff68d2cf8..9961828747f290cf501ceb2c51ee744b4a31349c 100644 --- a/source/dnode/mnode/impl/inc/mndDef.h +++ b/source/dnode/mnode/impl/inc/mndDef.h @@ -640,10 +640,14 @@ typedef struct { SArray* tasks; // SArray> SSchemaWrapper outputSchema; SSchemaWrapper tagSchema; + + // 3.0.20 + int64_t checkpointFreq; // ms + int64_t currentTick; // do not serialize } SStreamObj; int32_t tEncodeSStreamObj(SEncoder* pEncoder, const SStreamObj* pObj); -int32_t tDecodeSStreamObj(SDecoder* pDecoder, SStreamObj* pObj); +int32_t tDecodeSStreamObj(SDecoder* pDecoder, SStreamObj* pObj, int32_t sver); void tFreeStreamObj(SStreamObj* pObj); typedef struct { @@ -653,15 +657,6 @@ typedef struct { SArray* childInfo; // SArray } SStreamCheckpointObj; -#if 0 -typedef struct { - int64_t uid; - int64_t streamId; - int8_t status; - int8_t stage; -} SStreamRecoverObj; -#endif - #ifdef __cplusplus } #endif diff --git a/source/dnode/mnode/impl/src/mndDef.c b/source/dnode/mnode/impl/src/mndDef.c index 2e984212a1a905e722d81e2e2c7c325760812c47..b5c2fb05b3adb3f3e7d0c3ce19f3da055fabdb3c 100644 --- a/source/dnode/mnode/impl/src/mndDef.c +++ b/source/dnode/mnode/impl/src/mndDef.c @@ -76,11 +76,14 @@ int32_t tEncodeSStreamObj(SEncoder *pEncoder, const SStreamObj *pObj) { if (tEncodeSSchemaWrapper(pEncoder, &pObj->outputSchema) < 0) return -1; + // 3.0.20 + if (tEncodeI64(pEncoder, pObj->checkpointFreq) < 0) return -1; + tEndEncode(pEncoder); return pEncoder->pos; } -int32_t tDecodeSStreamObj(SDecoder *pDecoder, SStreamObj *pObj) { +int32_t tDecodeSStreamObj(SDecoder *pDecoder, SStreamObj *pObj, int32_t sver) { if (tStartDecode(pDecoder) < 0) return -1; if (tDecodeCStrTo(pDecoder, pObj->name) < 0) return -1; @@ -139,6 +142,10 @@ int32_t tDecodeSStreamObj(SDecoder *pDecoder, SStreamObj *pObj) { if (tDecodeSSchemaWrapper(pDecoder, &pObj->outputSchema) < 0) return -1; + // 3.0.20 + if (sver >= 2) { + if (tDecodeI64(pDecoder, &pObj->checkpointFreq) < 0) return -1; + } tEndDecode(pDecoder); return 0; } diff --git a/source/dnode/mnode/impl/src/mndMain.c b/source/dnode/mnode/impl/src/mndMain.c index e6aee3481f0335057253b5420493c6520c577162..d46ae7aaa985ddc1a51c12eea72bb411477bb7f5 100644 --- a/source/dnode/mnode/impl/src/mndMain.c +++ b/source/dnode/mnode/impl/src/mndMain.c @@ -85,6 +85,21 @@ static void *mndBuildTimerMsg(int32_t *pContLen) { return pReq; } +static void *mndBuildCheckpointTickMsg(int32_t *pContLen, int64_t sec) { + SMStreamTickReq timerReq = { + .tick = sec, + }; + + int32_t contLen = tSerializeSMStreamTickMsg(NULL, 0, &timerReq); + if (contLen <= 0) return NULL; + void *pReq = rpcMallocCont(contLen); + if (pReq == NULL) return NULL; + + tSerializeSMStreamTickMsg(pReq, contLen, &timerReq); + *pContLen = contLen; + return pReq; +} + static void mndPullupTrans(SMnode *pMnode) { int32_t contLen = 0; void *pReq = mndBuildTimerMsg(&contLen); @@ -105,7 +120,24 @@ static void mndCalMqRebalance(SMnode *pMnode) { int32_t contLen = 0; void *pReq = mndBuildTimerMsg(&contLen); if (pReq != NULL) { - SRpcMsg rpcMsg = {.msgType = TDMT_MND_TMQ_TIMER, .pCont = pReq, .contLen = contLen}; + SRpcMsg rpcMsg = { + .msgType = TDMT_MND_TMQ_TIMER, + .pCont = pReq, + .contLen = contLen, + }; + tmsgPutToQueue(&pMnode->msgCb, READ_QUEUE, &rpcMsg); + } +} + +static void mndStreamCheckpointTick(SMnode *pMnode, int64_t sec) { + int32_t contLen = 0; + void *pReq = mndBuildCheckpointTickMsg(&contLen, sec); + if (pReq != NULL) { + SRpcMsg rpcMsg = { + .msgType = TDMT_MND_STREAM_CHECKPOINT_TIMER, + .pCont = pReq, + .contLen = contLen, + }; tmsgPutToQueue(&pMnode->msgCb, READ_QUEUE, &rpcMsg); } } @@ -224,6 +256,12 @@ static void *mndThreadFp(void *param) { mndCalMqRebalance(pMnode); } +#if 0 + if (sec % tsStreamCheckpointTickInterval == 0) { + mndStreamCheckpointTick(pMnode, sec); + } +#endif + if (sec % tsTelemInterval == (TMIN(60, (tsTelemInterval - 1)))) { mndPullupTelem(pMnode); } @@ -606,17 +644,6 @@ static int32_t mndCheckMnodeState(SRpcMsg *pMsg) { return -1; } -static int32_t mndCheckMsgContent(SRpcMsg *pMsg) { - if (!IsReq(pMsg)) return 0; - if (pMsg->contLen != 0 && pMsg->pCont != NULL) return 0; - - const STraceId *trace = &pMsg->info.traceId; - mGError("msg:%p, failed to check msg, cont:%p contLen:%d, app:%p type:%s", pMsg, pMsg->pCont, pMsg->contLen, - pMsg->info.ahandle, TMSG_INFO(pMsg->msgType)); - terrno = TSDB_CODE_INVALID_MSG_LEN; - return -1; -} - int32_t mndProcessRpcMsg(SRpcMsg *pMsg) { SMnode *pMnode = pMsg->info.node; const STraceId *trace = &pMsg->info.traceId; @@ -628,7 +655,6 @@ int32_t mndProcessRpcMsg(SRpcMsg *pMsg) { return -1; } - if (mndCheckMsgContent(pMsg) != 0) return -1; if (mndCheckMnodeState(pMsg) != 0) return -1; mGTrace("msg:%p, start to process in mnode, app:%p type:%s", pMsg, pMsg->info.ahandle, TMSG_INFO(pMsg->msgType)); diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index 44ff8733fd326e3c28fa1481cbe6f95b06d2c8af..7ee688d220931e3a0bf70e93b99a000e0852b5e1 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -28,7 +28,7 @@ #include "parser.h" #include "tname.h" -#define MND_STREAM_VER_NUMBER 1 +#define MND_STREAM_VER_NUMBER 2 #define MND_STREAM_RESERVE_SIZE 64 static int32_t mndStreamActionInsert(SSdb *pSdb, SStreamObj *pStream); @@ -36,6 +36,8 @@ static int32_t mndStreamActionDelete(SSdb *pSdb, SStreamObj *pStream); static int32_t mndStreamActionUpdate(SSdb *pSdb, SStreamObj *pStream, SStreamObj *pNewStream); static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq); static int32_t mndProcessDropStreamReq(SRpcMsg *pReq); +static int32_t mndProcessStreamCheckpointTmr(SRpcMsg *pReq); +static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq); /*static int32_t mndProcessRecoverStreamReq(SRpcMsg *pReq);*/ static int32_t mndProcessStreamMetaReq(SRpcMsg *pReq); static int32_t mndGetStreamMeta(SRpcMsg *pReq, SShowObj *pShow, STableMetaRsp *pMeta); @@ -62,6 +64,10 @@ int32_t mndInitStream(SMnode *pMnode) { mndSetMsgHandle(pMnode, TDMT_STREAM_TASK_DEPLOY_RSP, mndTransProcessRsp); mndSetMsgHandle(pMnode, TDMT_STREAM_TASK_DROP_RSP, mndTransProcessRsp); + mndSetMsgHandle(pMnode, TDMT_MND_STREAM_CHECKPOINT_TIMER, mndProcessStreamCheckpointTmr); + mndSetMsgHandle(pMnode, TDMT_MND_STREAM_BEGIN_CHECKPOINT, mndProcessStreamDoCheckpoint); + mndSetMsgHandle(pMnode, TDMT_STREAM_TASK_REPORT_CHECKPOINT, mndTransProcessRsp); + mndAddShowRetrieveHandle(pMnode, TSDB_MGMT_TABLE_STREAMS, mndRetrieveStream); mndAddShowFreeIterHandle(pMnode, TSDB_MGMT_TABLE_STREAMS, mndCancelGetNextStream); mndAddShowRetrieveHandle(pMnode, TSDB_MGMT_TABLE_STREAM_TASKS, mndRetrieveStreamTask); @@ -127,7 +133,7 @@ SSdbRow *mndStreamActionDecode(SSdbRaw *pRaw) { int8_t sver = 0; if (sdbGetRawSoftVer(pRaw, &sver) != 0) goto STREAM_DECODE_OVER; - if (sver != MND_STREAM_VER_NUMBER) { + if (sver != 1 && sver != 2) { terrno = TSDB_CODE_SDB_INVALID_DATA_VER; goto STREAM_DECODE_OVER; } @@ -147,7 +153,7 @@ SSdbRow *mndStreamActionDecode(SSdbRaw *pRaw) { SDecoder decoder; tDecoderInit(&decoder, buf, tlen + 1); - if (tDecodeSStreamObj(&decoder, pStream) < 0) { + if (tDecodeSStreamObj(&decoder, pStream, sver) < 0) { tDecoderClear(&decoder); goto STREAM_DECODE_OVER; } @@ -680,93 +686,183 @@ _OVER: tFreeStreamObj(&streamObj); return code; } - -static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { +static int32_t mndProcessStreamCheckpointTmr(SRpcMsg *pReq) { SMnode *pMnode = pReq->info.node; + SSdb *pSdb = pMnode->pSdb; + void *pIter = NULL; SStreamObj *pStream = NULL; - /*SDbObj *pDb = NULL;*/ - /*SUserObj *pUser = NULL;*/ - SMDropStreamReq dropReq = {0}; - if (tDeserializeSMDropStreamReq(pReq->pCont, pReq->contLen, &dropReq) < 0) { - ASSERT(0); - terrno = TSDB_CODE_INVALID_MSG; - return -1; + // iterate all stream obj + while (1) { + pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream); + if (pIter == NULL) break; + // incr tick + int64_t currentTick = atomic_add_fetch_64(&pStream->currentTick, 1); + // if >= checkpointFreq, build msg TDMT_MND_STREAM_BEGIN_CHECKPOINT, put into write q + if (currentTick >= pStream->checkpointFreq) { + atomic_store_64(&pStream->currentTick, 0); + SMStreamDoCheckpointMsg *pMsg = rpcMallocCont(sizeof(SMStreamDoCheckpointMsg)); + + pMsg->streamId = pStream->uid; + pMsg->checkpointId = tGenIdPI64(); + memcpy(pMsg->streamName, pStream->name, TSDB_STREAM_FNAME_LEN); + + SRpcMsg rpcMsg = { + .msgType = TDMT_MND_STREAM_BEGIN_CHECKPOINT, + .pCont = pMsg, + .contLen = sizeof(SMStreamDoCheckpointMsg), + }; + + tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg); + } } - pStream = mndAcquireStream(pMnode, dropReq.name); + return 0; +} - if (pStream == NULL) { - if (dropReq.igNotExists) { - mInfo("stream:%s, not exist, ignore not exist is set", dropReq.name); - sdbRelease(pMnode->pSdb, pStream); - return 0; - } else { - terrno = TSDB_CODE_MND_STREAM_NOT_EXIST; - return -1; - } - } +static int32_t mndBuildStreamCheckpointSourceReq(void **pBuf, int32_t *pLen, const SStreamTask *pTask, + SMStreamDoCheckpointMsg *pMsg) { + SStreamCheckpointSourceReq req = {0}; + req.checkpointId = pMsg->checkpointId; + req.nodeId = pTask->nodeId; + req.expireTime = -1; + req.streamId = pTask->streamId; + req.taskId = pTask->taskId; - if (mndCheckDbPrivilegeByName(pMnode, pReq->info.conn.user, MND_OPER_WRITE_DB, pStream->targetDb) != 0) { + int32_t code; + int32_t blen; + + tEncodeSize(tEncodeSStreamCheckpointSourceReq, &req, blen, code); + if (code < 0) { + terrno = TSDB_CODE_OUT_OF_MEMORY; return -1; } - STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, pReq, "drop-stream"); - mndTransSetDbName(pTrans, pStream->sourceDb, pStream->targetDb); - if (pTrans == NULL) { - mError("stream:%s, failed to drop since %s", dropReq.name, terrstr()); - sdbRelease(pMnode->pSdb, pStream); + int32_t tlen = sizeof(SMsgHead) + blen; + + void *buf = taosMemoryMalloc(tlen); + if (buf == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; return -1; } - mInfo("trans:%d, used to drop stream:%s", pTrans->id, dropReq.name); - // drop all tasks - if (mndDropStreamTasks(pMnode, pTrans, pStream) < 0) { - mError("stream:%s, failed to drop task since %s", dropReq.name, terrstr()); - sdbRelease(pMnode->pSdb, pStream); - mndTransDrop(pTrans); + void *abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); + SEncoder encoder; + tEncoderInit(&encoder, abuf, tlen); + tEncodeSStreamCheckpointSourceReq(&encoder, &req); + + SMsgHead *pMsgHead = (SMsgHead *)buf; + pMsgHead->contLen = htonl(tlen); + pMsgHead->vgId = htonl(pTask->nodeId); + + tEncoderClear(&encoder); + + *pBuf = buf; + *pLen = tlen; + + return 0; +} + +static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) { + SMnode *pMnode = pReq->info.node; + SSdb *pSdb = pMnode->pSdb; + + SMStreamDoCheckpointMsg *pMsg = (SMStreamDoCheckpointMsg *)pReq->pCont; + + SStreamObj *pStream = mndAcquireStream(pMnode, pMsg->streamName); + + if (pStream == NULL || pStream->uid != pMsg->streamId) { + mError("start checkpointing failed since stream %s not found", pMsg->streamName); return -1; } - // drop stream - if (mndPersistDropStreamLog(pMnode, pTrans, pStream) < 0) { - sdbRelease(pMnode->pSdb, pStream); - mndTransDrop(pTrans); - return -1; + // build new transaction: + STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, pReq, "stream-checkpoint"); + if (pTrans == NULL) return -1; + mndTransSetDbName(pTrans, pStream->sourceDb, pStream->targetDb); + taosRLockLatch(&pStream->lock); + // 1. redo action: broadcast checkpoint source msg for all source vg + int32_t totLevel = taosArrayGetSize(pStream->tasks); + for (int32_t i = 0; i < totLevel; i++) { + SArray *pLevel = taosArrayGetP(pStream->tasks, i); + SStreamTask *pTask = taosArrayGetP(pLevel, 0); + if (pTask->taskLevel == TASK_LEVEL__SOURCE) { + int32_t sz = taosArrayGetSize(pLevel); + for (int32_t j = 0; j < sz; j++) { + SStreamTask *pTask = taosArrayGetP(pLevel, j); + ASSERT(pTask->nodeId > 0); + SVgObj *pVgObj = mndAcquireVgroup(pMnode, pTask->nodeId); + if (pVgObj == NULL) { + ASSERT(0); + taosRUnLockLatch(&pStream->lock); + mndReleaseStream(pMnode, pStream); + mndTransDrop(pTrans); + return -1; + } + + void *buf; + int32_t tlen; + if (mndBuildStreamCheckpointSourceReq(&buf, &tlen, pTask, pMsg) < 0) { + taosRUnLockLatch(&pStream->lock); + mndReleaseStream(pMnode, pStream); + mndTransDrop(pTrans); + return -1; + } + + STransAction action = {0}; + action.epSet = mndGetVgroupEpset(pMnode, pVgObj); + action.pCont = buf; + action.contLen = tlen; + action.msgType = TDMT_VND_STREAM_CHECK_POINT_SOURCE; + + mndReleaseVgroup(pMnode, pVgObj); + + if (mndTransAppendRedoAction(pTrans, &action) != 0) { + taosMemoryFree(buf); + taosRUnLockLatch(&pStream->lock); + mndReleaseStream(pMnode, pStream); + mndTransDrop(pTrans); + return -1; + } + } + } } + // 2. reset tick + atomic_store_64(&pStream->currentTick, 0); + // 3. commit log: stream checkpoint info + taosRUnLockLatch(&pStream->lock); if (mndTransPrepare(pMnode, pTrans) != 0) { - mError("trans:%d, failed to prepare drop stream trans since %s", pTrans->id, terrstr()); - sdbRelease(pMnode->pSdb, pStream); + mError("failed to prepare trans rebalance since %s", terrstr()); mndTransDrop(pTrans); + mndReleaseStream(pMnode, pStream); return -1; } - sdbRelease(pMnode->pSdb, pStream); + mndReleaseStream(pMnode, pStream); mndTransDrop(pTrans); - return TSDB_CODE_ACTION_IN_PROGRESS; + return 0; } -#if 0 -static int32_t mndProcessRecoverStreamReq(SRpcMsg *pReq) { +static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { SMnode *pMnode = pReq->info.node; SStreamObj *pStream = NULL; /*SDbObj *pDb = NULL;*/ /*SUserObj *pUser = NULL;*/ - SMRecoverStreamReq recoverReq = {0}; - if (tDeserializeSMRecoverStreamReq(pReq->pCont, pReq->contLen, &recoverReq) < 0) { + SMDropStreamReq dropReq = {0}; + if (tDeserializeSMDropStreamReq(pReq->pCont, pReq->contLen, &dropReq) < 0) { ASSERT(0); terrno = TSDB_CODE_INVALID_MSG; return -1; } - pStream = mndAcquireStream(pMnode, recoverReq.name); + pStream = mndAcquireStream(pMnode, dropReq.name); if (pStream == NULL) { - if (recoverReq.igNotExists) { - mInfo("stream:%s, not exist, ignore not exist is set", recoverReq.name); + if (dropReq.igNotExists) { + mInfo("stream:%s, not exist, ignore not exist is set", dropReq.name); sdbRelease(pMnode->pSdb, pStream); return 0; } else { @@ -779,39 +875,42 @@ static int32_t mndProcessRecoverStreamReq(SRpcMsg *pReq) { return -1; } - STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_NOTHING, pReq); + STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, pReq, "drop-stream"); + mndTransSetDbName(pTrans, pStream->sourceDb, pStream->targetDb); if (pTrans == NULL) { - mError("stream:%s, failed to recover since %s", recoverReq.name, terrstr()); + mError("stream:%s, failed to drop since %s", dropReq.name, terrstr()); sdbRelease(pMnode->pSdb, pStream); return -1; } - mInfo("trans:%d, used to drop stream:%s", pTrans->id, recoverReq.name); + mInfo("trans:%d, used to drop stream:%s", pTrans->id, dropReq.name); - // broadcast to recover all tasks - if (mndRecoverStreamTasks(pMnode, pTrans, pStream) < 0) { - mError("stream:%s, failed to recover task since %s", recoverReq.name, terrstr()); + // drop all tasks + if (mndDropStreamTasks(pMnode, pTrans, pStream) < 0) { + mError("stream:%s, failed to drop task since %s", dropReq.name, terrstr()); sdbRelease(pMnode->pSdb, pStream); + mndTransDrop(pTrans); return -1; } - // update stream status - if (mndSetStreamRecover(pMnode, pTrans, pStream) < 0) { + // drop stream + if (mndPersistDropStreamLog(pMnode, pTrans, pStream) < 0) { sdbRelease(pMnode->pSdb, pStream); + mndTransDrop(pTrans); return -1; } if (mndTransPrepare(pMnode, pTrans) != 0) { - mError("trans:%d, failed to prepare recover stream trans since %s", pTrans->id, terrstr()); + mError("trans:%d, failed to prepare drop stream trans since %s", pTrans->id, terrstr()); sdbRelease(pMnode->pSdb, pStream); mndTransDrop(pTrans); return -1; } sdbRelease(pMnode->pSdb, pStream); + mndTransDrop(pTrans); return TSDB_CODE_ACTION_IN_PROGRESS; } -#endif int32_t mndDropStreamByDb(SMnode *pMnode, STrans *pTrans, SDbObj *pDb) { SSdb *pSdb = pMnode->pSdb; @@ -847,13 +946,6 @@ int32_t mndDropStreamByDb(SMnode *pMnode, STrans *pTrans, SDbObj *pDb) { } } -#if 0 - if (mndSetDropOffsetStreamLogs(pMnode, pTrans, pStream) < 0) { - sdbRelease(pSdb, pStream); - goto END; - } -#endif - sdbRelease(pSdb, pStream); } diff --git a/source/dnode/mnode/impl/src/mndSubscribe.c b/source/dnode/mnode/impl/src/mndSubscribe.c index 3cf5e17cd3606b46f50667e15a27a8ade843a867..55e073a8a4a4e10df6410d5e77dd975b1bde3535 100644 --- a/source/dnode/mnode/impl/src/mndSubscribe.c +++ b/source/dnode/mnode/impl/src/mndSubscribe.c @@ -440,9 +440,9 @@ static int32_t mndDoRebalance(SMnode *pMnode, const SMqRebInputObj *pInput, SMqR } static int32_t mndPersistRebResult(SMnode *pMnode, SRpcMsg *pMsg, const SMqRebOutputObj *pOutput) { - STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_ROLLBACK, TRN_CONFLICT_DB_INSIDE, pMsg, "persist-reb"); - mndTransSetDbName(pTrans, pOutput->pSub->dbName, NULL); + STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_ROLLBACK, TRN_CONFLICT_DB_INSIDE, pMsg, "tmq-reb"); if (pTrans == NULL) return -1; + mndTransSetDbName(pTrans, pOutput->pSub->dbName, NULL); // make txn: // 1. redo action: action to all vg @@ -523,28 +523,6 @@ static int32_t mndPersistRebResult(SMnode *pMnode, SRpcMsg *pMsg, const SMqRebOu tDeleteSMqConsumerObj(pConsumerNew); taosMemoryFree(pConsumerNew); } -#if 0 - if (consumerNum) { - char topic[TSDB_TOPIC_FNAME_LEN]; - char cgroup[TSDB_CGROUP_LEN]; - mndSplitSubscribeKey(pOutput->pSub->key, topic, cgroup, true); - SMqTopicObj *pTopic = mndAcquireTopic(pMnode, topic); - if (pTopic) { - // TODO make topic complete - SMqTopicObj topicObj = {0}; - memcpy(&topicObj, pTopic, sizeof(SMqTopicObj)); - topicObj.refConsumerCnt = pTopic->refConsumerCnt - consumerNum; - // TODO is that correct? - pTopic->refConsumerCnt = topicObj.refConsumerCnt; - mInfo("subscribe topic %s unref %d consumer cgroup %s, refcnt %d", pTopic->name, consumerNum, cgroup, - topicObj.refConsumerCnt); - if (mndSetTopicCommitLogs(pMnode, pTrans, &topicObj) != 0) { - ASSERT(0); - goto REB_FAIL; - } - } - } -#endif // 4. TODO commit log: modification log diff --git a/source/dnode/snode/src/snode.c b/source/dnode/snode/src/snode.c index aa55204ae5ed8c511684ca10b5eeb2067be5bad5..b133226ed39bb5c20ed96b56d95881d009e1e872 100644 --- a/source/dnode/snode/src/snode.c +++ b/source/dnode/snode/src/snode.c @@ -168,7 +168,7 @@ int32_t sndProcessTaskDeployReq(SSnode *pSnode, char *msg, int32_t msgLen) { int32_t sndProcessTaskDropReq(SSnode *pSnode, char *msg, int32_t msgLen) { SVDropStreamTaskReq *pReq = (SVDropStreamTaskReq *)msg; - streamMetaRemoveTask1(pSnode->pMeta, pReq->taskId); + streamMetaRemoveTask(pSnode->pMeta, pReq->taskId); return 0; } diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 3d9ebec4c901612938d09a148244c8a27b395d7f..003b387838ac5bd1d913c49c18eafc447e835ebc 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -1425,7 +1425,7 @@ int32_t tqProcessTaskDispatchRsp(STQ* pTq, SRpcMsg* pMsg) { int32_t tqProcessTaskDropReq(STQ* pTq, int64_t version, char* msg, int32_t msgLen) { SVDropStreamTaskReq* pReq = (SVDropStreamTaskReq*)msg; - streamMetaRemoveTask1(pTq->pStreamMeta, pReq->taskId); + streamMetaRemoveTask(pTq->pStreamMeta, pReq->taskId); return 0; } diff --git a/source/libs/executor/inc/executorimpl.h b/source/libs/executor/inc/executorimpl.h index 7100be58e36baf851bc16b03f195e255a7fdc5e0..3dbcd8a07e68a98e0255049c22fb2b7925d46c46 100644 --- a/source/libs/executor/inc/executorimpl.h +++ b/source/libs/executor/inc/executorimpl.h @@ -781,7 +781,7 @@ void setInputDataBlock(SExprSupp* pExprSupp, SSDataBlock* pBlock, int32_t order, int32_t checkForQueryBuf(size_t numOfTables); bool isTaskKilled(SExecTaskInfo* pTaskInfo); -void setTaskKilled(SExecTaskInfo* pTaskInfo); +void setTaskKilled(SExecTaskInfo* pTaskInfo, int32_t rspCode); void doDestroyTask(SExecTaskInfo* pTaskInfo); void setTaskStatus(SExecTaskInfo* pTaskInfo, int8_t status); diff --git a/source/libs/executor/src/exchangeoperator.c b/source/libs/executor/src/exchangeoperator.c index 963a2732908c91ca65d354ba041c058cd050f29d..53660d88e1bd819e0adbd5f3fe556f3eca7b63f7 100644 --- a/source/libs/executor/src/exchangeoperator.c +++ b/source/libs/executor/src/exchangeoperator.c @@ -70,7 +70,7 @@ static void concurrentlyLoadRemoteDataImpl(SOperatorInfo* pOperator, SExchangeIn tsem_wait(&pExchangeInfo->ready); if (isTaskKilled(pTaskInfo)) { - longjmp(pTaskInfo->env, TSDB_CODE_TSC_QUERY_CANCELLED); + longjmp(pTaskInfo->env, pTaskInfo->code); } for (int32_t i = 0; i < totalSources; ++i) { @@ -573,7 +573,7 @@ int32_t prepareConcurrentlyLoad(SOperatorInfo* pOperator) { tsem_wait(&pExchangeInfo->ready); if (isTaskKilled(pTaskInfo)) { - longjmp(pTaskInfo->env, TSDB_CODE_TSC_QUERY_CANCELLED); + longjmp(pTaskInfo->env, pTaskInfo->code); } tsem_post(&pExchangeInfo->ready); @@ -621,7 +621,7 @@ int32_t seqLoadRemoteData(SOperatorInfo* pOperator) { doSendFetchDataRequest(pExchangeInfo, pTaskInfo, pExchangeInfo->current); tsem_wait(&pExchangeInfo->ready); if (isTaskKilled(pTaskInfo)) { - longjmp(pTaskInfo->env, TSDB_CODE_TSC_QUERY_CANCELLED); + longjmp(pTaskInfo->env, pTaskInfo->code); } SDownstreamSourceNode* pSource = taosArrayGet(pExchangeInfo->pSources, pExchangeInfo->current); diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index 10ceb9ccee3cbf4f678d611f8f7f049745763e93..ebd1afa85539eb9f2e660c3707b16a2c97e32240 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -688,7 +688,7 @@ void qStopTaskOperators(SExecTaskInfo* pTaskInfo) { taosWUnLockLatch(&pTaskInfo->stopInfo.lock); } -int32_t qAsyncKillTask(qTaskInfo_t qinfo) { +int32_t qAsyncKillTask(qTaskInfo_t qinfo, int32_t rspCode) { SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)qinfo; if (pTaskInfo == NULL) { @@ -697,7 +697,7 @@ int32_t qAsyncKillTask(qTaskInfo_t qinfo) { qDebug("%s execTask async killed", GET_TASKID(pTaskInfo)); - setTaskKilled(pTaskInfo); + setTaskKilled(pTaskInfo, rspCode); qStopTaskOperators(pTaskInfo); diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index d0d8b42442328e0268992a6383bc781ef06950c2..dd527058ceee4c32672fba04fd2983ba13d42ea9 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -610,21 +610,10 @@ void setBlockSMAInfo(SqlFunctionCtx* pCtx, SExprInfo* pExprInfo, SSDataBlock* pB } bool isTaskKilled(SExecTaskInfo* pTaskInfo) { - // query has been executed more than tsShellActivityTimer, and the retrieve has not arrived - // abort current query execution. - if (pTaskInfo->owner != 0 && - ((taosGetTimestampSec() - pTaskInfo->cost.start / 1000) > 10 * getMaximumIdleDurationSec()) - /*(!needBuildResAfterQueryComplete(pTaskInfo))*/) { - assert(pTaskInfo->cost.start != 0); - // qDebug("QInfo:%" PRIu64 " retrieve not arrive beyond %d ms, abort current query execution, start:%" PRId64 - // ", current:%d", pQInfo->qId, 1, pQInfo->startExecTs, taosGetTimestampSec()); - // return true; - } - - return false; + return (0 != pTaskInfo->code) ? true : false; } -void setTaskKilled(SExecTaskInfo* pTaskInfo) { pTaskInfo->code = TSDB_CODE_TSC_QUERY_CANCELLED; } +void setTaskKilled(SExecTaskInfo* pTaskInfo, int32_t rspCode) { pTaskInfo->code = rspCode; } ///////////////////////////////////////////////////////////////////////////////////////////// STimeWindow getAlignQueryTimeWindow(SInterval* pInterval, int32_t precision, int64_t key) { diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 7f0dec1959031dba37a078f519d5d7f31b62cece..cf0e7b532f1ce8ad0c31bdb2f055dd63a32fc13f 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -629,7 +629,7 @@ static SSDataBlock* doTableScanImpl(SOperatorInfo* pOperator) { while (tsdbNextDataBlock(pTableScanInfo->base.dataReader)) { if (isTaskKilled(pTaskInfo)) { - T_LONG_JMP(pTaskInfo->env, TSDB_CODE_TSC_QUERY_CANCELLED); + T_LONG_JMP(pTaskInfo->env, pTaskInfo->code); } // process this data block based on the probabilities @@ -2032,7 +2032,7 @@ static SSDataBlock* doRawScan(SOperatorInfo* pOperator) { if (pInfo->dataReader && tsdbNextDataBlock(pInfo->dataReader)) { if (isTaskKilled(pTaskInfo)) { - longjmp(pTaskInfo->env, TSDB_CODE_TSC_QUERY_CANCELLED); + longjmp(pTaskInfo->env, pTaskInfo->code); } int32_t rows = 0; @@ -2529,7 +2529,7 @@ static SSDataBlock* getTableDataBlockImpl(void* param) { STsdbReader* reader = pInfo->base.dataReader; while (tsdbNextDataBlock(reader)) { if (isTaskKilled(pTaskInfo)) { - T_LONG_JMP(pTaskInfo->env, TSDB_CODE_TSC_QUERY_CANCELLED); + T_LONG_JMP(pTaskInfo->env, pTaskInfo->code); } // process this data block based on the probabilities diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index d2d97f7b90c0a54b7f47586d13677e4c83e15f4e..15682a2cfe68768526bfce06edd50f0abbe86ce9 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -750,8 +750,8 @@ static bool isPrimaryKeyImpl(SNode* pExpr) { return (PRIMARYKEY_TIMESTAMP_COL_ID == ((SColumnNode*)pExpr)->colId); } else if (QUERY_NODE_FUNCTION == nodeType(pExpr)) { SFunctionNode* pFunc = (SFunctionNode*)pExpr; - if (FUNCTION_TYPE_SELECT_VALUE == pFunc->funcType || FUNCTION_TYPE_FIRST == pFunc->funcType || - FUNCTION_TYPE_LAST == pFunc->funcType) { + if (FUNCTION_TYPE_SELECT_VALUE == pFunc->funcType || FUNCTION_TYPE_GROUP_KEY == pFunc->funcType || + FUNCTION_TYPE_FIRST == pFunc->funcType || FUNCTION_TYPE_LAST == pFunc->funcType) { return isPrimaryKeyImpl(nodesListGetNode(pFunc->pParameterList, 0)); } else if (FUNCTION_TYPE_WSTART == pFunc->funcType || FUNCTION_TYPE_WEND == pFunc->funcType) { return true; diff --git a/source/libs/qworker/inc/qwInt.h b/source/libs/qworker/inc/qwInt.h index a0e04b6a19bd16cb98415278ea8748406e1cfa89..af361323a7731f13512af85139d3ccdfd955ebf0 100644 --- a/source/libs/qworker/inc/qwInt.h +++ b/source/libs/qworker/inc/qwInt.h @@ -363,7 +363,7 @@ int32_t qwAcquireTaskCtx(QW_FPARAMS_DEF, SQWTaskCtx **ctx); int32_t qwGetTaskCtx(QW_FPARAMS_DEF, SQWTaskCtx **ctx); int32_t qwAddAcquireTaskCtx(QW_FPARAMS_DEF, SQWTaskCtx **ctx); void qwReleaseTaskCtx(SQWorker *mgmt, void *ctx); -int32_t qwKillTaskHandle(SQWTaskCtx *ctx); +int32_t qwKillTaskHandle(SQWTaskCtx *ctx, int32_t rspCode); int32_t qwUpdateTaskStatus(QW_FPARAMS_DEF, int8_t status); int32_t qwDropTask(QW_FPARAMS_DEF); void qwSaveTbVersionInfo(qTaskInfo_t pTaskInfo, SQWTaskCtx *ctx); diff --git a/source/libs/qworker/src/qwUtil.c b/source/libs/qworker/src/qwUtil.c index 2c0a4072aeb51179b172d8181f220198fa00534b..86fd1d533cb251904428d696b31bfb1e4716fe77 100644 --- a/source/libs/qworker/src/qwUtil.c +++ b/source/libs/qworker/src/qwUtil.c @@ -279,14 +279,14 @@ void qwFreeTaskHandle(qTaskInfo_t *taskHandle) { } } -int32_t qwKillTaskHandle(SQWTaskCtx *ctx) { +int32_t qwKillTaskHandle(SQWTaskCtx *ctx, int32_t rspCode) { int32_t code = 0; // Note: free/kill may in RC qTaskInfo_t taskHandle = atomic_load_ptr(&ctx->taskHandle); if (taskHandle && atomic_val_compare_exchange_ptr(&ctx->taskHandle, taskHandle, NULL)) { qDebug("start to kill task"); - code = qAsyncKillTask(taskHandle); + code = qAsyncKillTask(taskHandle, rspCode); atomic_store_ptr(&ctx->taskHandle, taskHandle); } diff --git a/source/libs/qworker/src/qworker.c b/source/libs/qworker/src/qworker.c index 0890d10b65728e1dadf84c9aea3640dc090d6aa0..9a318df324704291f83b6a69594c06b8823cd053 100644 --- a/source/libs/qworker/src/qworker.c +++ b/source/libs/qworker/src/qworker.c @@ -411,7 +411,7 @@ int32_t qwHandlePrePhaseEvents(QW_FPARAMS_DEF, int8_t phase, SQWPhaseInput *inpu // qwBuildAndSendDropRsp(&ctx->ctrlConnInfo, code); // QW_TASK_DLOG("drop rsp send, handle:%p, code:%x - %s", ctx->ctrlConnInfo.handle, code, tstrerror(code)); - QW_ERR_JRET(TSDB_CODE_QRY_TASK_DROPPED); + QW_ERR_JRET(ctx->rspCode); } QW_ERR_JRET(qwUpdateTaskStatus(QW_FPARAMS(), JOB_TASK_STATUS_EXEC)); @@ -420,7 +420,7 @@ int32_t qwHandlePrePhaseEvents(QW_FPARAMS_DEF, int8_t phase, SQWPhaseInput *inpu case QW_PHASE_PRE_FETCH: { if (QW_EVENT_PROCESSED(ctx, QW_EVENT_DROP) || QW_EVENT_RECEIVED(ctx, QW_EVENT_DROP)) { QW_TASK_WLOG("task dropping or already dropped, phase:%s", qwPhaseStr(phase)); - QW_ERR_JRET(TSDB_CODE_QRY_TASK_DROPPED); + QW_ERR_JRET(ctx->rspCode); } if (QW_EVENT_RECEIVED(ctx, QW_EVENT_FETCH)) { @@ -442,7 +442,7 @@ int32_t qwHandlePrePhaseEvents(QW_FPARAMS_DEF, int8_t phase, SQWPhaseInput *inpu case QW_PHASE_PRE_CQUERY: { if (QW_EVENT_PROCESSED(ctx, QW_EVENT_DROP)) { QW_TASK_WLOG("task already dropped, phase:%s", qwPhaseStr(phase)); - QW_ERR_JRET(TSDB_CODE_QRY_TASK_DROPPED); + QW_ERR_JRET(ctx->rspCode); } if (ctx->rspCode) { @@ -456,7 +456,7 @@ int32_t qwHandlePrePhaseEvents(QW_FPARAMS_DEF, int8_t phase, SQWPhaseInput *inpu // qwBuildAndSendDropRsp(&ctx->ctrlConnInfo, code); // QW_TASK_DLOG("drop rsp send, handle:%p, code:%x - %s", ctx->ctrlConnInfo.handle, code, tstrerror(code)); - QW_ERR_JRET(TSDB_CODE_QRY_TASK_DROPPED); + QW_ERR_JRET(ctx->rspCode); } break; @@ -502,7 +502,7 @@ int32_t qwHandlePostPhaseEvents(QW_FPARAMS_DEF, int8_t phase, SQWPhaseInput *inp if (QW_EVENT_PROCESSED(ctx, QW_EVENT_DROP)) { QW_TASK_WLOG("task already dropped, phase:%s", qwPhaseStr(phase)); - QW_ERR_JRET(TSDB_CODE_QRY_TASK_DROPPED); + QW_ERR_JRET(ctx->rspCode); } if (QW_EVENT_RECEIVED(ctx, QW_EVENT_DROP)) { @@ -515,7 +515,7 @@ int32_t qwHandlePostPhaseEvents(QW_FPARAMS_DEF, int8_t phase, SQWPhaseInput *inp // QW_TASK_DLOG("drop rsp send, handle:%p, code:%x - %s", ctx->ctrlConnInfo.handle, code, tstrerror(code)); QW_ERR_JRET(qwDropTask(QW_FPARAMS())); - QW_ERR_JRET(TSDB_CODE_QRY_TASK_DROPPED); + QW_ERR_JRET(ctx->rspCode); } if (ctx->rspCode) { @@ -861,7 +861,7 @@ int32_t qwProcessDrop(QW_FPARAMS_DEF, SQWMsg *qwMsg) { } if (QW_QUERY_RUNNING(ctx)) { - QW_ERR_JRET(qwKillTaskHandle(ctx)); + QW_ERR_JRET(qwKillTaskHandle(ctx, TSDB_CODE_TSC_QUERY_CANCELLED)); qwUpdateTaskStatus(QW_FPARAMS(), JOB_TASK_STATUS_DROP); } else { QW_ERR_JRET(qwDropTask(QW_FPARAMS())); @@ -869,6 +869,7 @@ int32_t qwProcessDrop(QW_FPARAMS_DEF, SQWMsg *qwMsg) { } if (!dropped) { + QW_UPDATE_RSP_CODE(ctx, TSDB_CODE_TSC_QUERY_CANCELLED); QW_SET_EVENT_RECEIVED(ctx, QW_EVENT_DROP); } @@ -1195,8 +1196,9 @@ void qWorkerStopAllTasks(void *qWorkerMgmt) { } if (QW_QUERY_RUNNING(ctx)) { - qwKillTaskHandle(ctx); + qwKillTaskHandle(ctx, TSDB_CODE_VND_STOPPED); } else if (!QW_EVENT_PROCESSED(ctx, QW_EVENT_DROP)) { + QW_UPDATE_RSP_CODE(ctx, TSDB_CODE_VND_STOPPED); QW_SET_EVENT_RECEIVED(ctx, QW_EVENT_DROP); } diff --git a/source/libs/qworker/test/qworkerTests.cpp b/source/libs/qworker/test/qworkerTests.cpp index 8a48977c777af8d794a38ed1721319c4b0646953..02b341e28c5684868e32354de199eb0be252b487 100644 --- a/source/libs/qworker/test/qworkerTests.cpp +++ b/source/libs/qworker/test/qworkerTests.cpp @@ -302,7 +302,7 @@ int32_t qwtExecTask(qTaskInfo_t tinfo, SSDataBlock **pRes, uint64_t *useconds) { return 0; } -int32_t qwtKillTask(qTaskInfo_t qinfo) { return 0; } +int32_t qwtKillTask(qTaskInfo_t qinfo, int32_t rspCode) { return 0; } void qwtDestroyTask(qTaskInfo_t qHandle) {} diff --git a/source/libs/scheduler/src/schTask.c b/source/libs/scheduler/src/schTask.c index 9a7f3332b3de8f1577d998d85966e0d166190347..c8b8db367fd4875b979e3e8de7b103454b07cc55 100644 --- a/source/libs/scheduler/src/schTask.c +++ b/source/libs/scheduler/src/schTask.c @@ -391,6 +391,8 @@ int32_t schChkUpdateRedirectCtx(SSchJob *pJob, SSchTask *pTask, SEpSet *pEpSet) int64_t leftTime = tsMaxRetryWaitTime - lastTime; pTask->delayExecMs = leftTime < pCtx->periodMs ? leftTime : pCtx->periodMs; + pCtx->roundTimes = 0; + goto _return; } @@ -407,7 +409,7 @@ _return: int32_t schDoTaskRedirect(SSchJob *pJob, SSchTask *pTask, SDataBuf *pData, int32_t rspCode) { int32_t code = 0; - SCH_TASK_DLOG("task will be redirected now, status:%s", SCH_GET_TASK_STATUS_STR(pTask)); + SCH_TASK_DLOG("task will be redirected now, status:%s, code:%s", SCH_GET_TASK_STATUS_STR(pTask), tstrerror(rspCode)); if (NULL == pData) { pTask->retryTimes = 0; @@ -430,15 +432,15 @@ int32_t schDoTaskRedirect(SSchJob *pJob, SSchTask *pTask, SDataBuf *pData, int32 if (SCH_IS_DATA_BIND_TASK(pTask)) { if (pData && pData->pEpSet) { SCH_ERR_JRET(schUpdateTaskCandidateAddr(pJob, pTask, pData->pEpSet)); - } else if (SYNC_UNKNOWN_LEADER_REDIRECT_ERROR(rspCode)) { + } else if (SYNC_SELF_LEADER_REDIRECT_ERROR(rspCode)) { SQueryNodeAddr *addr = taosArrayGet(pTask->candidateAddrs, pTask->candidateIdx); - SCH_SWITCH_EPSET(addr); - SCH_TASK_DLOG("switch task target node %d epset to %d/%d", addr->nodeId, addr->epSet.inUse, addr->epSet.numOfEps); + SEp *pEp = &addr->epSet.eps[addr->epSet.inUse]; + SCH_TASK_DLOG("task retry node %d current ep, idx:%d/%d,%s:%d, code:%s", addr->nodeId, addr->epSet.inUse, + addr->epSet.numOfEps, pEp->fqdn, pEp->port, tstrerror(rspCode)); } else { SQueryNodeAddr *addr = taosArrayGet(pTask->candidateAddrs, pTask->candidateIdx); - SEp *pEp = &addr->epSet.eps[addr->epSet.inUse]; - SCH_TASK_DLOG("task retry node %d current ep, idx:%d/%d,%s:%d", addr->nodeId, addr->epSet.inUse, - addr->epSet.numOfEps, pEp->fqdn, pEp->port); + SCH_SWITCH_EPSET(addr); + SCH_TASK_DLOG("switch task target node %d epset to %d/%d", addr->nodeId, addr->epSet.inUse, addr->epSet.numOfEps); } if (SCH_TASK_NEED_FLOW_CTRL(pJob, pTask)) { diff --git a/source/libs/stream/inc/streamInc.h b/source/libs/stream/inc/streamInc.h index 5ff49502df60401e1f08ea7aeeaf37f66e717e19..66496f11f8b2da8ad46d91637efdeb1444f26319 100644 --- a/source/libs/stream/inc/streamInc.h +++ b/source/libs/stream/inc/streamInc.h @@ -17,7 +17,6 @@ #define _STREAM_INC_H_ #include "executor.h" -#include "tref.h" #include "tstream.h" #ifdef __cplusplus @@ -25,9 +24,8 @@ extern "C" { #endif typedef struct { - int8_t inited; - int32_t refPool; - void* timer; + int8_t inited; + void* timer; } SStreamGlobalEnv; static SStreamGlobalEnv streamEnv; diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c new file mode 100644 index 0000000000000000000000000000000000000000..efd19074da1b2f51e1217b3f7ab359f2b4a33c95 --- /dev/null +++ b/source/libs/stream/src/streamCheckpoint.c @@ -0,0 +1,194 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "streamInc.h" + +int32_t tEncodeSStreamCheckpointSourceReq(SEncoder* pEncoder, const SStreamCheckpointSourceReq* pReq) { + if (tStartEncode(pEncoder) < 0) return -1; + if (tEncodeI64(pEncoder, pReq->streamId) < 0) return -1; + if (tEncodeI64(pEncoder, pReq->checkpointId) < 0) return -1; + if (tEncodeI32(pEncoder, pReq->taskId) < 0) return -1; + if (tEncodeI32(pEncoder, pReq->nodeId) < 0) return -1; + if (tEncodeI64(pEncoder, pReq->expireTime) < 0) return -1; + tEndEncode(pEncoder); + return pEncoder->pos; +} + +int32_t tDecodeSStreamCheckpointSourceReq(SDecoder* pDecoder, SStreamCheckpointSourceReq* pReq) { + if (tStartDecode(pDecoder) < 0) return -1; + if (tDecodeI64(pDecoder, &pReq->streamId) < 0) return -1; + if (tDecodeI64(pDecoder, &pReq->checkpointId) < 0) return -1; + if (tDecodeI32(pDecoder, &pReq->taskId) < 0) return -1; + if (tDecodeI32(pDecoder, &pReq->nodeId) < 0) return -1; + if (tDecodeI64(pDecoder, &pReq->expireTime) < 0) return -1; + tEndDecode(pDecoder); + return 0; +} + +int32_t tEncodeSStreamCheckpointSourceRsp(SEncoder* pEncoder, const SStreamCheckpointSourceRsp* pRsp) { + if (tStartEncode(pEncoder) < 0) return -1; + if (tEncodeI64(pEncoder, pRsp->streamId) < 0) return -1; + if (tEncodeI64(pEncoder, pRsp->checkpointId) < 0) return -1; + if (tEncodeI32(pEncoder, pRsp->taskId) < 0) return -1; + if (tEncodeI32(pEncoder, pRsp->nodeId) < 0) return -1; + if (tEncodeI64(pEncoder, pRsp->expireTime) < 0) return -1; + tEndEncode(pEncoder); + return pEncoder->pos; +} + +int32_t tDecodeSStreamCheckpointSourceRsp(SDecoder* pDecoder, SStreamCheckpointSourceRsp* pRsp) { + if (tStartDecode(pDecoder) < 0) return -1; + if (tDecodeI64(pDecoder, &pRsp->streamId) < 0) return -1; + if (tDecodeI64(pDecoder, &pRsp->checkpointId) < 0) return -1; + if (tDecodeI32(pDecoder, &pRsp->taskId) < 0) return -1; + if (tDecodeI32(pDecoder, &pRsp->nodeId) < 0) return -1; + if (tDecodeI64(pDecoder, &pRsp->expireTime) < 0) return -1; + tEndDecode(pDecoder); + return 0; +} + +int32_t tEncodeSStreamCheckpointReq(SEncoder* pEncoder, const SStreamCheckpointReq* pReq) { + if (tStartEncode(pEncoder) < 0) return -1; + if (tEncodeI64(pEncoder, pReq->streamId) < 0) return -1; + if (tEncodeI64(pEncoder, pReq->checkpointId) < 0) return -1; + if (tEncodeI32(pEncoder, pReq->downstreamTaskId) < 0) return -1; + if (tEncodeI32(pEncoder, pReq->downstreamNodeId) < 0) return -1; + if (tEncodeI64(pEncoder, pReq->upstreamTaskId) < 0) return -1; + if (tEncodeI64(pEncoder, pReq->upstreamNodeId) < 0) return -1; + if (tEncodeI32(pEncoder, pReq->childId) < 0) return -1; + if (tEncodeI64(pEncoder, pReq->expireTime) < 0) return -1; + if (tEncodeI8(pEncoder, pReq->taskLevel) < 0) return -1; + tEndEncode(pEncoder); + return pEncoder->pos; +} + +int32_t tDecodeSStreamCheckpointReq(SDecoder* pDecoder, SStreamCheckpointReq* pReq) { + if (tStartDecode(pDecoder) < 0) return -1; + if (tDecodeI64(pDecoder, &pReq->streamId) < 0) return -1; + if (tDecodeI64(pDecoder, &pReq->checkpointId) < 0) return -1; + if (tDecodeI32(pDecoder, &pReq->downstreamTaskId) < 0) return -1; + if (tDecodeI32(pDecoder, &pReq->downstreamNodeId) < 0) return -1; + if (tDecodeI32(pDecoder, &pReq->upstreamTaskId) < 0) return -1; + if (tDecodeI32(pDecoder, &pReq->upstreamNodeId) < 0) return -1; + if (tDecodeI32(pDecoder, &pReq->childId) < 0) return -1; + if (tDecodeI64(pDecoder, &pReq->expireTime) < 0) return -1; + if (tDecodeI8(pDecoder, &pReq->taskLevel) < 0) return -1; + tEndDecode(pDecoder); + return 0; +} + +int32_t tEncodeSStreamCheckpointRsp(SEncoder* pEncoder, const SStreamCheckpointRsp* pRsp) { + if (tStartEncode(pEncoder) < 0) return -1; + if (tEncodeI64(pEncoder, pRsp->streamId) < 0) return -1; + if (tEncodeI64(pEncoder, pRsp->checkpointId) < 0) return -1; + if (tEncodeI32(pEncoder, pRsp->downstreamTaskId) < 0) return -1; + if (tEncodeI32(pEncoder, pRsp->downstreamNodeId) < 0) return -1; + if (tEncodeI64(pEncoder, pRsp->upstreamTaskId) < 0) return -1; + if (tEncodeI64(pEncoder, pRsp->upstreamNodeId) < 0) return -1; + if (tEncodeI32(pEncoder, pRsp->childId) < 0) return -1; + if (tEncodeI64(pEncoder, pRsp->expireTime) < 0) return -1; + if (tEncodeI8(pEncoder, pRsp->taskLevel) < 0) return -1; + tEndEncode(pEncoder); + return pEncoder->pos; +} + +int32_t tDecodeSStreamCheckpointRsp(SDecoder* pDecoder, SStreamCheckpointRsp* pRsp) { + if (tStartDecode(pDecoder) < 0) return -1; + if (tDecodeI64(pDecoder, &pRsp->streamId) < 0) return -1; + if (tDecodeI64(pDecoder, &pRsp->checkpointId) < 0) return -1; + if (tDecodeI32(pDecoder, &pRsp->downstreamTaskId) < 0) return -1; + if (tDecodeI32(pDecoder, &pRsp->downstreamNodeId) < 0) return -1; + if (tDecodeI32(pDecoder, &pRsp->upstreamTaskId) < 0) return -1; + if (tDecodeI32(pDecoder, &pRsp->upstreamNodeId) < 0) return -1; + if (tDecodeI32(pDecoder, &pRsp->childId) < 0) return -1; + if (tDecodeI64(pDecoder, &pRsp->expireTime) < 0) return -1; + if (tDecodeI8(pDecoder, &pRsp->taskLevel) < 0) return -1; + tEndDecode(pDecoder); + return 0; +} + +static int32_t streamAlignCheckpoint(SStreamTask* pTask, int64_t checkpointId, int32_t childId) { + if (pTask->checkpointingId == 0) { + pTask->checkpointingId = checkpointId; + pTask->checkpointAlignCnt = taosArrayGetSize(pTask->childEpInfo); + } + + ASSERT(pTask->checkpointingId == checkpointId); + + return atomic_sub_fetch_32(&pTask->checkpointAlignCnt, 1); +} + +static int32_t streamDoCheckpoint(SStreamMeta* pMeta, SStreamTask* pTask, int64_t checkpointId) { + // commit tdb state + streamStateCommit(pTask->pState); + // commit non-tdb state + // copy and save new state + // report to mnode + // send checkpoint req to downstream + return 0; +} + +static int32_t streamDoSourceCheckpoint(SStreamMeta* pMeta, SStreamTask* pTask, int64_t checkpointId) { + // ref wal + // set status checkpointing + // do checkpoint + return 0; +} +int32_t streamProcessCheckpointSourceReq(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointSourceReq* pReq) { + int32_t code; + int64_t checkpointId = pReq->checkpointId; + + code = streamDoSourceCheckpoint(pMeta, pTask, checkpointId); + if (code < 0) { + // rsp error + return -1; + } + + return 0; +} + +int32_t streamProcessCheckpointReq(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointReq* pReq) { + int32_t code; + int64_t checkpointId = pReq->checkpointId; + int32_t childId = pReq->childId; + + if (taosArrayGetSize(pTask->childEpInfo) > 0) { + code = streamAlignCheckpoint(pTask, checkpointId, childId); + if (code > 0) { + return 0; + } + if (code < 0) { + ASSERT(0); + return -1; + } + } + + code = streamDoCheckpoint(pMeta, pTask, checkpointId); + if (code < 0) { + // rsp error + return -1; + } + + // send rsp to all children + + return 0; +} + +int32_t streamProcessCheckpointRsp(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointRsp* pRsp) { + // recover step2, scan from wal + // unref wal + // set status normal + return 0; +} diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index edc16b60623e887483d920a7e2fd06a1787421de..afad78c5e57724809500905e35be136c9354885f 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -202,7 +202,7 @@ void streamMetaReleaseTask(SStreamMeta* pMeta, SStreamTask* pTask) { } } -void streamMetaRemoveTask1(SStreamMeta* pMeta, int32_t taskId) { +void streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId) { SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, &taskId, sizeof(int32_t)); if (ppTask) { SStreamTask* pTask = *ppTask; @@ -219,35 +219,6 @@ void streamMetaRemoveTask1(SStreamMeta* pMeta, int32_t taskId) { } } -int32_t streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId) { - SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, &taskId, sizeof(int32_t)); - if (ppTask) { - SStreamTask* pTask = *ppTask; - taosHashRemove(pMeta->pTasks, &taskId, sizeof(int32_t)); - atomic_store_8(&pTask->taskStatus, TASK_STATUS__DROPPING); - - if (tdbTbDelete(pMeta->pTaskDb, &taskId, sizeof(int32_t), pMeta->txn) < 0) { - /*return -1;*/ - } - - if (pTask->triggerParam != 0) { - taosTmrStop(pTask->timer); - } - - while (1) { - int8_t schedStatus = - atomic_val_compare_exchange_8(&pTask->schedStatus, TASK_SCHED_STATUS__INACTIVE, TASK_SCHED_STATUS__DROPPING); - if (schedStatus != TASK_SCHED_STATUS__ACTIVE) { - tFreeSStreamTask(pTask); - break; - } - taosMsleep(10); - } - } - - return 0; -} - int32_t streamMetaBegin(SStreamMeta* pMeta) { if (tdbBegin(pMeta->db, &pMeta->txn, tdbDefaultMalloc, tdbDefaultFree, NULL, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) { diff --git a/source/libs/stream/src/streamRecover.c b/source/libs/stream/src/streamRecover.c index 7eee95a580cfc9ee01767998fcc9aabe418682ba..6889a870d19b2ec01f2f77ab57d8e70866599053 100644 --- a/source/libs/stream/src/streamRecover.c +++ b/source/libs/stream/src/streamRecover.c @@ -325,46 +325,3 @@ int32_t tDecodeSStreamRecoverFinishReq(SDecoder* pDecoder, SStreamRecoverFinishR tEndDecode(pDecoder); return 0; } - -int32_t tEncodeSStreamCheckpointInfo(SEncoder* pEncoder, const SStreamCheckpointInfo* pCheckpoint) { - if (tEncodeI32(pEncoder, pCheckpoint->srcNodeId) < 0) return -1; - if (tEncodeI32(pEncoder, pCheckpoint->srcChildId) < 0) return -1; - if (tEncodeI64(pEncoder, pCheckpoint->stateProcessedVer) < 0) return -1; - return 0; -} - -int32_t tDecodeSStreamCheckpointInfo(SDecoder* pDecoder, SStreamCheckpointInfo* pCheckpoint) { - if (tDecodeI32(pDecoder, &pCheckpoint->srcNodeId) < 0) return -1; - if (tDecodeI32(pDecoder, &pCheckpoint->srcChildId) < 0) return -1; - if (tDecodeI64(pDecoder, &pCheckpoint->stateProcessedVer) < 0) return -1; - return 0; -} - -int32_t tEncodeSStreamMultiVgCheckpointInfo(SEncoder* pEncoder, const SStreamMultiVgCheckpointInfo* pCheckpoint) { - if (tEncodeI64(pEncoder, pCheckpoint->streamId) < 0) return -1; - if (tEncodeI64(pEncoder, pCheckpoint->checkTs) < 0) return -1; - if (tEncodeI32(pEncoder, pCheckpoint->checkpointId) < 0) return -1; - if (tEncodeI32(pEncoder, pCheckpoint->taskId) < 0) return -1; - int32_t sz = taosArrayGetSize(pCheckpoint->checkpointVer); - if (tEncodeI32(pEncoder, sz) < 0) return -1; - for (int32_t i = 0; i < sz; i++) { - SStreamCheckpointInfo* pOneVgCkpoint = taosArrayGet(pCheckpoint->checkpointVer, i); - if (tEncodeSStreamCheckpointInfo(pEncoder, pOneVgCkpoint) < 0) return -1; - } - return 0; -} - -int32_t tDecodeSStreamMultiVgCheckpointInfo(SDecoder* pDecoder, SStreamMultiVgCheckpointInfo* pCheckpoint) { - if (tDecodeI64(pDecoder, &pCheckpoint->streamId) < 0) return -1; - if (tDecodeI64(pDecoder, &pCheckpoint->checkTs) < 0) return -1; - if (tDecodeI32(pDecoder, &pCheckpoint->checkpointId) < 0) return -1; - if (tDecodeI32(pDecoder, &pCheckpoint->taskId) < 0) return -1; - int32_t sz; - if (tDecodeI32(pDecoder, &sz) < 0) return -1; - for (int32_t i = 0; i < sz; i++) { - SStreamCheckpointInfo oneVgCheckpoint; - if (tDecodeSStreamCheckpointInfo(pDecoder, &oneVgCheckpoint) < 0) return -1; - taosArrayPush(pCheckpoint->checkpointVer, &oneVgCheckpoint); - } - return 0; -} diff --git a/source/libs/sync/src/syncPipeline.c b/source/libs/sync/src/syncPipeline.c index 03306af925318866539cd6c300975b7ae95fee67..f0be976402a95bdbcc0bcfe1de177516851e840d 100644 --- a/source/libs/sync/src/syncPipeline.c +++ b/source/libs/sync/src/syncPipeline.c @@ -74,7 +74,7 @@ SyncTerm syncLogReplMgrGetPrevLogTerm(SSyncLogReplMgr* pMgr, SSyncNode* pNode, S SyncTerm prevLogTerm = -1; terrno = TSDB_CODE_SUCCESS; - if (prevIndex == -1) return 0; + if (prevIndex == -1 && pNode->pLogStore->syncLogBeginIndex(pNode->pLogStore) == 0) return 0; if (prevIndex > pBuf->matchIndex) { terrno = TSDB_CODE_WAL_LOG_NOT_EXIST; @@ -691,7 +691,6 @@ int32_t syncLogReplMgrProcessReplyInRecoveryMode(SSyncLogReplMgr* pMgr, SSyncNod if (pMsg->matchIndex < pNode->pLogBuf->matchIndex) { term = syncLogReplMgrGetPrevLogTerm(pMgr, pNode, index + 1); - if (term < 0 || (term != pMsg->lastMatchTerm && (index + 1 == firstVer || index == firstVer))) { ASSERT(term >= 0 || terrno == TSDB_CODE_WAL_LOG_NOT_EXIST); if (syncNodeStartSnapshot(pNode, &destId) < 0) { diff --git a/source/libs/transport/inc/transComm.h b/source/libs/transport/inc/transComm.h index 479c1a5af7587b70cb9e1f8943d1c0af6790c137..bf9a6c005103c76a6e8201d4833870527d107f36 100644 --- a/source/libs/transport/inc/transComm.h +++ b/source/libs/transport/inc/transComm.h @@ -151,8 +151,8 @@ typedef struct { int64_t retryNextInterval; bool retryInit; int32_t retryStep; - - int8_t epsetRetryCnt; + int8_t epsetRetryCnt; + int32_t retryCode; int hThrdIdx; } STransConnCtx; diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index fbcc1fb525c5cc02410bd3f88a8530aa820bd01b..a9afbd7ba85ecca5e6951173c66dea43e55f3725 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -1020,7 +1020,6 @@ void cliHandleReq(SCliMsg* pMsg, SCliThrd* pThrd) { char tbuf[256] = {0}; EPSET_DEBUG_STR(&pCtx->epSet, tbuf); - tDebug("current epset %s", tbuf); if (!EPSET_IS_VALID(&pCtx->epSet)) { tError("invalid epset"); @@ -1500,34 +1499,46 @@ bool cliGenRetryRule(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) { pCtx->retryNextInterval = pCtx->retryMinInterval; pCtx->retryStep = 0; pCtx->retryInit = true; + pCtx->retryCode = TSDB_CODE_SUCCESS; } + if (-1 != pCtx->retryMaxTimeout && taosGetTimestampMs() - pCtx->retryInitTimestamp >= pCtx->retryMaxTimeout) { return false; } + // code, msgType + + // A: epset, leader, not self + // B: epset, not know leader + // C: no epset, leader but not serivce + bool noDelay = false; if (code == TSDB_CODE_RPC_BROKEN_LINK || code == TSDB_CODE_RPC_NETWORK_UNAVAIL) { - tDebug("code str %s, contlen:%d 0", tstrerror(code), pResp->contLen); + tTrace("code str %s, contlen:%d 0", tstrerror(code), pResp->contLen); noDelay = cliResetEpset(pCtx, pResp, false); transFreeMsg(pResp->pCont); transUnrefCliHandle(pConn); } else if (code == TSDB_CODE_SYN_NOT_LEADER || code == TSDB_CODE_SYN_INTERNAL_ERROR || - code == TSDB_CODE_SYN_PROPOSE_NOT_READY || code == TSDB_CODE_RPC_REDIRECT) { - tDebug("code str %s, contlen:%d 1", tstrerror(code), pResp->contLen); + code == TSDB_CODE_SYN_PROPOSE_NOT_READY || code == TSDB_CODE_RPC_REDIRECT || code == TSDB_CODE_VND_STOPPED) { + tTrace("code str %s, contlen:%d 1", tstrerror(code), pResp->contLen); noDelay = cliResetEpset(pCtx, pResp, true); transFreeMsg(pResp->pCont); addConnToPool(pThrd->pool, pConn); } else if (code == TSDB_CODE_SYN_RESTORING) { - tDebug("code str %s, contlen:%d 0", tstrerror(code), pResp->contLen); + tTrace("code str %s, contlen:%d 0", tstrerror(code), pResp->contLen); noDelay = cliResetEpset(pCtx, pResp, false); addConnToPool(pThrd->pool, pConn); transFreeMsg(pResp->pCont); } else { - tDebug("code str %s, contlen:%d 0", tstrerror(code), pResp->contLen); + tTrace("code str %s, contlen:%d 0", tstrerror(code), pResp->contLen); noDelay = cliResetEpset(pCtx, pResp, false); addConnToPool(pThrd->pool, pConn); transFreeMsg(pResp->pCont); } + if (code != TSDB_CODE_RPC_BROKEN_LINK && code != TSDB_CODE_RPC_NETWORK_UNAVAIL && code != TSDB_CODE_SUCCESS) { + // save one internal code + pCtx->retryCode = code; + } if (noDelay == false) { pCtx->epsetRetryCnt = 1; @@ -1556,29 +1567,36 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) { STrans* pTransInst = pThrd->pTransInst; if (pMsg == NULL || pMsg->ctx == NULL) { - tDebug("%s conn %p handle resp", pTransInst->label, pConn); + tTrace("%s conn %p handle resp", pTransInst->label, pConn); pTransInst->cfp(pTransInst->parent, pResp, NULL); return 0; } STransConnCtx* pCtx = pMsg->ctx; - int32_t code = pResp->code; bool retry = cliGenRetryRule(pConn, pResp, pMsg); if (retry == true) { return -1; } - STraceId* trace = &pResp->info.traceId; - bool hasEpSet = cliTryExtractEpSet(pResp, &pCtx->epSet); + if (pCtx->retryCode != TSDB_CODE_SUCCESS) { + int32_t code = pResp->code; + // return internal code app + if (code == TSDB_CODE_RPC_NETWORK_UNAVAIL || code == TSDB_CODE_RPC_BROKEN_LINK) { + pResp->code = pCtx->retryCode; + } + } + + STraceId* trace = &pResp->info.traceId; + bool hasEpSet = cliTryExtractEpSet(pResp, &pCtx->epSet); if (hasEpSet) { char tbuf[256] = {0}; EPSET_DEBUG_STR(&pCtx->epSet, tbuf); - tGDebug("%s conn %p extract epset from msg", CONN_GET_INST_LABEL(pConn), pConn); + tGTrace("%s conn %p extract epset from msg", CONN_GET_INST_LABEL(pConn), pConn); } if (pCtx->pSem != NULL) { - tGDebug("%s conn %p(sync) handle resp", CONN_GET_INST_LABEL(pConn), pConn); + tGTrace("%s conn %p(sync) handle resp", CONN_GET_INST_LABEL(pConn), pConn); if (pCtx->pRsp == NULL) { tGTrace("%s conn %p(sync) failed to resp, ignore", CONN_GET_INST_LABEL(pConn), pConn); } else { @@ -1587,11 +1605,11 @@ int cliAppCb(SCliConn* pConn, STransMsg* pResp, SCliMsg* pMsg) { tsem_post(pCtx->pSem); pCtx->pRsp = NULL; } else { - tGDebug("%s conn %p handle resp", CONN_GET_INST_LABEL(pConn), pConn); + tGTrace("%s conn %p handle resp", CONN_GET_INST_LABEL(pConn), pConn); if (retry == false && hasEpSet == true) { pTransInst->cfp(pTransInst->parent, pResp, &pCtx->epSet); } else { - if (!cliIsEpsetUpdated(code, pCtx)) { + if (!cliIsEpsetUpdated(pResp->code, pCtx)) { pTransInst->cfp(pTransInst->parent, pResp, NULL); } else { pTransInst->cfp(pTransInst->parent, pResp, &pCtx->epSet); diff --git a/source/os/src/osSemaphore.c b/source/os/src/osSemaphore.c index bfce8b3151ffe051398d89ba5eada720b897c105..2f947d325263d45025a7cff835a715ac108674e7 100644 --- a/source/os/src/osSemaphore.c +++ b/source/os/src/osSemaphore.c @@ -75,20 +75,16 @@ int32_t tsem_wait(tsem_t* sem) { return ret; } -int32_t tsem_timewait(tsem_t* sem, int64_t milis) { - return 0; - /*return tsem_wait(sem);*/ -#if 0 +int32_t tsem_timewait(tsem_t* sem, int64_t ms) { struct timespec ts; - timespec_get(&ts); + taosClockGetTime(0, &ts); + ts.tv_nsec += ms * 1000000; ts.tv_sec += ts.tv_nsec / 1000000000; ts.tv_nsec %= 1000000000; - - /*GetSystemTimeAsFileTime(&ft_before);*/ - // errno = 0; - rc = sem_timedwait(sem, ts); - + int rc; + while ((rc = sem_timedwait(sem, &ts)) == -1 && errno == EINTR) continue; + return rc; /* This should have timed out */ // assert(errno == ETIMEDOUT); // assert(rc != 0); @@ -103,8 +99,6 @@ int32_t tsem_timewait(tsem_t* sem, int64_t milis) { // printf("time must advance during sem_timedwait."); // return 1; // } - return rc; -#endif } #elif defined(_TD_DARWIN_64) diff --git a/source/util/src/terror.c b/source/util/src/terror.c index 2025f196f293f0654a5f3ebacb6875dc9f560e9a..f5d11e3e960c802b92123abb8d507b85c8348342 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -67,6 +67,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_REF_ID_REMOVED, "Ref ID is removed") TAOS_DEFINE_ERROR(TSDB_CODE_REF_INVALID_ID, "Invalid Ref ID") TAOS_DEFINE_ERROR(TSDB_CODE_REF_ALREADY_EXIST, "Ref is already there") TAOS_DEFINE_ERROR(TSDB_CODE_REF_NOT_EXIST, "Ref is not there") + TAOS_DEFINE_ERROR(TSDB_CODE_APP_ERROR, "Unexpected generic error") TAOS_DEFINE_ERROR(TSDB_CODE_ACTION_IN_PROGRESS, "Action in progress") TAOS_DEFINE_ERROR(TSDB_CODE_OUT_OF_RANGE, "Out of range") @@ -83,6 +84,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_INVALID_VERSION_NUMBER, "Invalid version numbe TAOS_DEFINE_ERROR(TSDB_CODE_INVALID_VERSION_STRING, "Invalid version string") TAOS_DEFINE_ERROR(TSDB_CODE_VERSION_NOT_COMPATIBLE, "Version not compatible") TAOS_DEFINE_ERROR(TSDB_CODE_CHECKSUM_ERROR, "Checksum error") + TAOS_DEFINE_ERROR(TSDB_CODE_COMPRESS_ERROR, "Failed to compress msg") TAOS_DEFINE_ERROR(TSDB_CODE_MSG_NOT_PROCESSED, "Message not processed") TAOS_DEFINE_ERROR(TSDB_CODE_CFG_NOT_FOUND, "Config not found") @@ -97,6 +99,9 @@ TAOS_DEFINE_ERROR(TSDB_CODE_NOT_FOUND, "Not found") TAOS_DEFINE_ERROR(TSDB_CODE_NO_DISKSPACE, "Out of disk space") TAOS_DEFINE_ERROR(TSDB_CODE_TIMEOUT_ERROR, "Operation timeout") +TAOS_DEFINE_ERROR(TSDB_CODE_APP_IS_STARTING, "Database is starting up") +TAOS_DEFINE_ERROR(TSDB_CODE_APP_IS_STOPPING, "Database is closing down") + //client TAOS_DEFINE_ERROR(TSDB_CODE_TSC_INVALID_OPERATION, "Invalid operation") TAOS_DEFINE_ERROR(TSDB_CODE_TSC_INVALID_QHANDLE, "Invalid qhandle") @@ -316,6 +321,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_VND_COL_ALREADY_EXISTS, "Table column already TAOS_DEFINE_ERROR(TSDB_CODE_VND_COL_NOT_EXISTS, "Table column not exists") TAOS_DEFINE_ERROR(TSDB_CODE_VND_COL_SUBSCRIBED, "Table column is subscribed") TAOS_DEFINE_ERROR(TSDB_CODE_VND_NO_AVAIL_BUFPOOL, "No availabe buffer pool") +TAOS_DEFINE_ERROR(TSDB_CODE_VND_STOPPED, "Vnode stopped") // tsdb TAOS_DEFINE_ERROR(TSDB_CODE_TDB_INVALID_TABLE_ID, "Invalid table ID")