diff --git a/CMakeLists.txt b/CMakeLists.txt index 0436f5b25923927edaa7568ba57c7b948446f8b1..8fe6cc69e05e47b9d9ecfd1b8534d215f597e033 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.16) +cmake_minimum_required(VERSION 3.0) project( TDengine diff --git a/cmake/cmake.define b/cmake/cmake.define index a8bab17aba8a412099b34c6d82c9787468bc89e8..5637c666b99294a536288741948877450cdb9eb5 100644 --- a/cmake/cmake.define +++ b/cmake/cmake.define @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.16) +cmake_minimum_required(VERSION 3.0) set(CMAKE_VERBOSE_MAKEFILE OFF) diff --git a/cmake/cmake.platform b/cmake/cmake.platform index 0312f92a5b4116cad03d4bb9c2e7556d7a35deb2..acf17e9427bc453e1ece67cca5cbfe45f8827337 100644 --- a/cmake/cmake.platform +++ b/cmake/cmake.platform @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.16) +cmake_minimum_required(VERSION 3.0) MESSAGE("Current system is ${CMAKE_SYSTEM_NAME}") diff --git a/docs-cn/12-taos-sql/01-data-type.md b/docs-cn/12-taos-sql/01-data-type.md index be5c9a8cb4ed7f4ed9f9c7e11faf1b0f8f6e51b8..bdf20c7f1f58ccf4027049ffaafcdb352386cce9 100644 --- a/docs-cn/12-taos-sql/01-data-type.md +++ b/docs-cn/12-taos-sql/01-data-type.md @@ -4,6 +4,8 @@ title: 支持的数据类型 description: "TDengine 支持的数据类型: 时间戳、浮点型、JSON 类型等" --- +## 时间戳 + 使用 TDengine,最重要的是时间戳。创建并插入记录、查询历史记录的时候,均需要指定时间戳。时间戳有如下规则: - 时间格式为 `YYYY-MM-DD HH:mm:ss.MS`,默认时间分辨率为毫秒。比如:`2017-08-12 18:25:58.128` @@ -12,39 +14,54 @@ description: "TDengine 支持的数据类型: 时间戳、浮点型、JSON 类 - Epoch Time:时间戳也可以是一个长整数,表示从格林威治时间 1970-01-01 00:00:00.000 (UTC/GMT) 开始的毫秒数(相应地,如果所在 Database 的时间精度设置为“微秒”,则长整型格式的时间戳含义也就对应于从格林威治时间 1970-01-01 00:00:00.000 (UTC/GMT) 开始的微秒数;纳秒精度逻辑类似。) - 时间可以加减,比如 now-2h,表明查询时刻向前推 2 个小时(最近 2 小时)。数字后面的时间单位可以是 b(纳秒)、u(微秒)、a(毫秒)、s(秒)、m(分)、h(小时)、d(天)、w(周)。 比如 `select * from t1 where ts > now-2w and ts <= now-1w`,表示查询两周前整整一周的数据。在指定降采样操作(down sampling)的时间窗口(interval)时,时间单位还可以使用 n (自然月) 和 y (自然年)。 -TDengine 缺省的时间戳精度是毫秒,但通过在 `CREATE DATABASE` 时传递的 PRECISION 参数也可以支持微秒和纳秒。(从 2.1.5.0 版本开始支持纳秒精度) +TDengine 缺省的时间戳精度是毫秒,但通过在 `CREATE DATABASE` 时传递的 PRECISION 参数也可以支持微秒和纳秒。 ```sql CREATE DATABASE db_name PRECISION 'ns'; ``` +## 数据类型 -在 TDengine 中,普通表的数据模型中可使用以下 10 种数据类型。 +在 TDengine 中,普通表的数据模型中可使用以下数据类型。 | # | **类型** | **Bytes** | **说明** | | --- | :-------: | --------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| 1 | TIMESTAMP | 8 | 时间戳。缺省精度毫秒,可支持微秒和纳秒。从格林威治时间 1970-01-01 00:00:00.000 (UTC/GMT) 开始,计时不能早于该时间。(从 2.0.18.0 版本开始,已经去除了这一时间范围限制)(从 2.1.5.0 版本开始支持纳秒精度) | -| 2 | INT | 4 | 整型,范围 [-2^31+1, 2^31-1], -2^31 用作 NULL | -| 3 | BIGINT | 8 | 长整型,范围 [-2^63+1, 2^63-1], -2^63 用作 NULL | -| 4 | FLOAT | 4 | 浮点型,有效位数 6-7,范围 [-3.4E38, 3.4E38] | -| 5 | DOUBLE | 8 | 双精度浮点型,有效位数 15-16,范围 [-1.7E308, 1.7E308] | -| 6 | BINARY | 自定义 | 记录单字节字符串,建议只用于处理 ASCII 可见字符,中文等多字节字符需使用 nchar。理论上,最长可以有 16374 字节。binary 仅支持字符串输入,字符串两端需使用单引号引用。使用时须指定大小,如 binary(20) 定义了最长为 20 个单字节字符的字符串,每个字符占 1 byte 的存储空间,总共固定占用 20 bytes 的空间,此时如果用户字符串超出 20 字节将会报错。对于字符串内的单引号,可以用转义字符反斜线加单引号来表示,即 `\’`。 | -| 7 | SMALLINT | 2 | 短整型, 范围 [-32767, 32767], -32768 用作 NULL | -| 8 | TINYINT | 1 | 单字节整型,范围 [-127, 127], -128 用作 NULL | -| 9 | BOOL | 1 | 布尔型,{true, false} | -| 10 | NCHAR | 自定义 | 记录包含多字节字符在内的字符串,如中文字符。每个 nchar 字符占用 4 bytes 的存储空间。字符串两端使用单引号引用,字符串内的单引号需用转义字符 `\’`。nchar 使用时须指定字符串大小,类型为 nchar(10) 的列表示此列的字符串最多存储 10 个 nchar 字符,会固定占用 40 bytes 的空间。如果用户字符串长度超出声明长度,将会报错。 | -| 11 | JSON | | json 数据类型, 只有 tag 可以是 json 格式 | - -:::tip -TDengine 对 SQL 语句中的英文字符不区分大小写,自动转化为小写执行。因此用户大小写敏感的字符串及密码,需要使用单引号将字符串引起来。 +| 1 | TIMESTAMP | 8 | 时间戳。缺省精度毫秒,可支持微秒和纳秒,详细说明见上节。 | +| 2 | INT | 4 | 整型,范围 [-2^31, 2^31-1] | +| 3 | INT UNSIGNED| 4| 无符号整数,[0, 2^32-1] +| 4 | BIGINT | 8 | 长整型,范围 [-2^63, 2^63-1] | +| 5 | BIGINT UNSIGNED | 8 | 长整型,范围 [0, 2^64-1] | +| 6 | FLOAT | 4 | 浮点型,有效位数 6-7,范围 [-3.4E38, 3.4E38] | +| 7 | DOUBLE | 8 | 双精度浮点型,有效位数 15-16,范围 [-1.7E308, 1.7E308] | +| 8 | BINARY | 自定义 | 记录单字节字符串,建议只用于处理 ASCII 可见字符,中文等多字节字符需使用 nchar。 | +| 9 | SMALLINT | 2 | 短整型, 范围 [-32768, 32767] | +| 10 | SMALLINT UNSIGNED | 2| 无符号短整型,范围 [0, 655357] | +| 11 | TINYINT | 1 | 单字节整型,范围 [-128, 127] | +| 12 | TINYINT UNSIGNED | 1 | 无符号单字节整型,范围 [0, 255] | +| 13 | BOOL | 1 | 布尔型,{true, false} | +| 14 | NCHAR | 自定义 | 记录包含多字节字符在内的字符串,如中文字符。每个 nchar 字符占用 4 bytes 的存储空间。字符串两端使用单引号引用,字符串内的单引号需用转义字符 `\’`。nchar 使用时须指定字符串大小,类型为 nchar(10) 的列表示此列的字符串最多存储 10 个 nchar 字符,会固定占用 40 bytes 的空间。如果用户字符串长度超出声明长度,将会报错。 | +| 15 | JSON | | json 数据类型, 只有 tag 可以是 json 格式 | +| 16 | VARCHAR | 自定义 | BINARY类型的别名 | -::: :::note -虽然 BINARY 类型在底层存储上支持字节型的二进制字符,但不同编程语言对二进制数据的处理方式并不保证一致,因此建议在 BINARY 类型中只存储 ASCII 可见字符,而避免存储不可见字符。多字节的数据,例如中文字符,则需要使用 NCHAR 类型进行保存。如果强行使用 BINARY 类型保存中文字符,虽然有时也能正常读写,但并不带有字符集信息,很容易出现数据乱码甚至数据损坏等情况。 +- TDengine 对 SQL 语句中的英文字符不区分大小写,自动转化为小写执行。因此用户大小写敏感的字符串及密码,需要使用单引号将字符串引起来。 +- 虽然 BINARY 类型在底层存储上支持字节型的二进制字符,但不同编程语言对二进制数据的处理方式并不保证一致,因此建议在 BINARY 类型中只存储 ASCII 可见字符,而避免存储不可见字符。多字节的数据,例如中文字符,则需要使用 NCHAR 类型进行保存。如果强行使用 BINARY 类型保存中文字符,虽然有时也能正常读写,但并不带有字符集信息,很容易出现数据乱码甚至数据损坏等情况。 +- BINARY 类型理论上最长可以有 16374 字节。binary 仅支持字符串输入,字符串两端需使用单引号引用。使用时须指定大小,如 binary(20) 定义了最长为 20 个单字节字符的字符串,每个字符占 1 byte 的存储空间,总共固定占用 20 bytes 的空间,此时如果用户字符串超出 20 字节将会报错。对于字符串内的单引号,可以用转义字符反斜线加单引号来表示,即 `\’`。 +- SQL 语句中的数值类型将依据是否存在小数点,或使用科学计数法表示,来判断数值类型是否为整型或者浮点型,因此在使用时要注意相应类型越界的情况。例如,9999999999999999999 会认为超过长整型的上边界而溢出,而 9999999999999999999.0 会被认为是有效的浮点数。 ::: -:::note -SQL 语句中的数值类型将依据是否存在小数点,或使用科学计数法表示,来判断数值类型是否为整型或者浮点型,因此在使用时要注意相应类型越界的情况。例如,9999999999999999999 会认为超过长整型的上边界而溢出,而 9999999999999999999.0 会被认为是有效的浮点数。 -::: +## 常量 +TDengine支持多个类型的常量,细节如下表: + +| # | **语法** | **类型** | **说明** | +| --- | :-------: | --------- | -------------------------------------- | +| 1 | [{+ \| -}]123 | BIGINT | 整型数值的字面量的类型均为BIGINT。如果用户输入超过了BIGINT的表示范围,TDengine 按BIGINT对数值进行截断。| +| 2 | 123.45 | DOUBLE | 浮点数值的字面量的类型均为DOUBLE。TDengine依据是否存在小数点,或使用科学计数法表示,来判断数值类型是否为整型或者浮点型,因此在使用时要注意相应类型越界的情况。例如,9999999999999999999会认为超过长整型的上边界而溢出,而9999999999999999999.0会被认为是有效的浮点数。| +| 3 | 1.2E3 | DOUBLE | 科学计数法的字面量的类型为DOUBLE。| +| 4 | 'abc' | BINARY | 单引号括住的内容为字符串字面值,其类型为BINARY,BINARY的size为实际的字符个数。对于字符串内的单引号,可以用转义字符反斜线加单引号来表示,即 \'。| +| 5 | "abc" | BINARY | 双引号括住的内容为字符串字面值,其类型为BINARY,BINARY的size为实际的字符个数。对于字符串内的双引号,可以用转义字符反斜线加单引号来表示,即 \"。 | +| 6 | TIMESTAMP {'literal' \| "literal"} | TIMESTAMP | TIMESTAMP关键字表示后面的字符串字面量需要被解释为TIMESTAMP类型。字符串需要满足YYYY-MM-DD HH:mm:ss.MS格式,其时间分辨率为当前数据库的时间分辨率。 | +| 7 | {TRUE \| FALSE} | BOOL | 布尔类型字面量。 | +| 8 | {'' \| "" \| '\t' \| "\t" \| ' ' \| " " \| NULL } | -- | 空值字面量。可以用于任意类型。| diff --git a/docs-cn/12-taos-sql/07-function.md b/docs-cn/12-taos-sql/07-function.md index 78389609db6b471ed2fb7fc0628bb7dbe72f61b9..2349e6aa3c02eb62fba1fc7e4eef15e08e3924d1 100644 --- a/docs-cn/12-taos-sql/07-function.md +++ b/docs-cn/12-taos-sql/07-function.md @@ -321,6 +321,32 @@ taos> SELECT HISTOGRAM(voltage, 'log_bin', '{"start": 1, "factor": 3, "count": 3 {"lower_bin":27, "upper_bin":inf, "count":1} | ``` +### ELAPSED + +```mysql +SELECT ELAPSED(field_name[, time_unit]) FROM { tb_name | stb_name } [WHERE clause] [INTERVAL(interval [, offset]) [SLIDING sliding]]; +``` + +**功能说明**:elapsed函数表达了统计周期内连续的时间长度,和twa函数配合使用可以计算统计曲线下的面积。在通过INTERVAL子句指定窗口的情况下,统计在给定时间范围内的每个窗口内有数据覆盖的时间范围;如果没有INTERVAL子句,则返回整个给定时间范围内的有数据覆盖的时间范围。注意,ELAPSED返回的并不是时间范围的绝对值,而是绝对值除以time_unit所得到的单位个数。 + +**返回结果类型**:Double + +**应用字段**:Timestamp类型 + +**支持的版本**:2.6.0.0 及以后的版本。 + +**适用于**: 表,超级表,嵌套查询的外层查询 + +**说明**: +- field_name参数只能是表的第一列,即timestamp主键列。 +- 按time_unit参数指定的时间单位返回,最小是数据库的时间分辨率。time_unit参数未指定时,以数据库的时间分辨率为时间单位。 +- 可以和interval组合使用,返回每个时间窗口的时间戳差值。需要特别注意的是,除第一个时间窗口和最后一个时间窗口外,中间窗口的时间戳差值均为窗口长度。 +- order by asc/desc不影响差值的计算结果。 +- 对于超级表,需要和group by tbname子句组合使用,不可以直接使用。 +- 对于普通表,不支持和group by子句组合使用。 +- 对于嵌套查询,仅当内层查询会输出隐式时间戳列时有效。例如select elapsed(ts) from (select diff(value) from sub1)语句,diff函数会让内层查询输出隐式时间戳列,此为主键列,可以用于elapsed函数的第一个参数。相反,例如select elapsed(ts) from (select * from sub1) 语句,ts列输出到外层时已经没有了主键列的含义,无法使用elapsed函数。此外,elapsed函数作为一个与时间线强依赖的函数,形如select elapsed(ts) from (select diff(value) from st group by tbname)尽管会返回一条计算结果,但并无实际意义,这种用法后续也将被限制。 +- 不支持与leastsquares、diff、derivative、top、bottom、last_row、interp等函数混合使用。 + ## 选择函数 在使用所有的选择函数的时候,可以同时指定输出 ts 列或标签列(包括 tbname),这样就可以方便地知道被选出的值是源于哪个数据行的。 diff --git a/docs-cn/12-taos-sql/12-keywords/index.md b/docs-cn/12-taos-sql/12-keywords/index.md index a64a7161d03f28c131a07e5b1077d7b956e44007..0b9ec4de862fc6b6ade11e733a0f7b169a79a324 100644 --- a/docs-cn/12-taos-sql/12-keywords/index.md +++ b/docs-cn/12-taos-sql/12-keywords/index.md @@ -85,3 +85,44 @@ title: TDengine 参数限制与保留关键字 | CONNECTIONS | HAVING | NOT | SOFFSET | VNODES | | CONNS | ID | NOTNULL | STABLE | WAL | | COPY | IF | NOW | STABLES | WHERE | +| _C0 | _QSTART | _QSTOP | _QDURATION | _WSTART | +| _WSTOP | _WDURATION | + +## 特殊说明 +### TBNAME +`TBNAME` 可以视为超级表中一个特殊的标签,代表子表的表名。 + +获取一个超级表所有的子表名及相关的标签信息: +```mysql +SELECT TBNAME, location FROM meters; + +统计超级表下辖子表数量: +```mysql +SELECT COUNT(TBNAME) FROM meters; +``` + +以上两个查询均只支持在WHERE条件子句中添加针对标签(TAGS)的过滤条件。例如: +```mysql +taos> SELECT TBNAME, location FROM meters; + tbname | location | +================================================================== + d1004 | California.SanFrancisco | + d1003 | California.SanFrancisco | + d1002 | California.LosAngeles | + d1001 | California.LosAngeles | +Query OK, 4 row(s) in set (0.000881s) + +taos> SELECT COUNT(tbname) FROM meters WHERE groupId > 2; + count(tbname) | +======================== + 2 | +Query OK, 1 row(s) in set (0.001091s) +``` +### _QSTART/_QSTOP/_QDURATION +表示查询过滤窗口的起始,结束以及持续时间 (从2.6.0.0版本开始支持) + +### _WSTART/_WSTOP/_WDURATION +窗口切分聚合查询(例如 interval/session window/state window)中表示每个切分窗口的起始,结束以及持续时间(从 2.6.0.0 版本开始支持) + +### _c0 +表示表或超级表的第一列 \ No newline at end of file diff --git a/docs-cn/20-third-party/11-kafka.md b/docs-cn/20-third-party/11-kafka.md index 0de5b43a396fa3bb4aba558308f95cb0d6f96bc5..8369806adcfe1b195348e7d60160609cde9150e8 100644 --- a/docs-cn/20-third-party/11-kafka.md +++ b/docs-cn/20-third-party/11-kafka.md @@ -7,7 +7,7 @@ TDengine Kafka Connector 包含两个插件: TDengine Source Connector 和 TDeng ## 什么是 Kafka Connect? -Kafka Connect 是 Apache Kafka 的一个组件,用于使其它系统,比如数据库、云服务、文件系统等能方便地连接到 Kafka。数据既可以通过 Kafka Connect 从其它系统流向 Kafka, 也可以通过 Kafka Connect 从 Kafka 流向其它系统。从其它系统读数据的插件称为 Source Connector, 写数据到其它系统的插件称为 Sink Connector。Source Connector 和 Sink Connector 都不会直接连接 Kafka Broker,Source Connector 把数据转交给 Kafka Connect。Sink Connector 从 Kafka Connect 接收数据。 +Kafka Connect 是 [Apache Kafka](https://kafka.apache.org/) 的一个组件,用于使其它系统,比如数据库、云服务、文件系统等能方便地连接到 Kafka。数据既可以通过 Kafka Connect 从其它系统流向 Kafka, 也可以通过 Kafka Connect 从 Kafka 流向其它系统。从其它系统读数据的插件称为 Source Connector, 写数据到其它系统的插件称为 Sink Connector。Source Connector 和 Sink Connector 都不会直接连接 Kafka Broker,Source Connector 把数据转交给 Kafka Connect。Sink Connector 从 Kafka Connect 接收数据。 ![TDengine Database Kafka Connector -- Kafka Connect structure](kafka/Kafka_Connect.webp) @@ -17,7 +17,7 @@ TDengine Source Connector 用于把数据实时地从 TDengine 读出来发送 ## 什么是 Confluent? -Confluent 在 Kafka 的基础上增加很多扩展功能。包括: +[Confluent](https://www.confluent.io/) 在 Kafka 的基础上增加很多扩展功能。包括: 1. Schema Registry 2. REST 代理 @@ -81,10 +81,10 @@ Development: false git clone https://github.com:taosdata/kafka-connect-tdengine.git cd kafka-connect-tdengine mvn clean package -unzip -d $CONFLUENT_HOME/share/confluent-hub-components/ target/components/packages/taosdata-kafka-connect-tdengine-0.1.0.zip +unzip -d $CONFLUENT_HOME/share/java/ target/components/packages/taosdata-kafka-connect-tdengine-*.zip ``` -以上脚本先 clone 项目源码,然后用 Maven 编译打包。打包完成后在 `target/components/packages/` 目录生成了插件的 zip 包。把这个 zip 包解压到安装插件的路径即可。安装插件的路径在配置文件 `$CONFLUENT_HOME/etc/kafka/connect-standalone.properties` 中。默认的路径为 `$CONFLUENT_HOME/share/confluent-hub-components/`。 +以上脚本先 clone 项目源码,然后用 Maven 编译打包。打包完成后在 `target/components/packages/` 目录生成了插件的 zip 包。把这个 zip 包解压到安装插件的路径即可。上面的示例中使用了内置的插件安装路径: `$CONFLUENT_HOME/share/java/`。 ### 用 confluent-hub 安装 @@ -98,7 +98,7 @@ confluent local services start ``` :::note -一定要先安装插件再启动 Confluent, 否则会出现找不到类的错误。Kafka Connect 的日志(默认路径: /tmp/confluent.xxxx/connect/logs/connect.log)中会输出成功安装的插件,据此可判断插件是否安装成功。 +一定要先安装插件再启动 Confluent, 否则加载插件会失败。 ::: :::tip @@ -125,6 +125,61 @@ Control Center is [UP] 清空数据可执行 `rm -rf /tmp/confluent.106668`。 ::: +### 验证各个组件是否启动成功 + +输入命令: + +``` +confluent local services status +``` + +如果各组件都启动成功,会得到如下输出: + +``` +Connect is [UP] +Control Center is [UP] +Kafka is [UP] +Kafka REST is [UP] +ksqlDB Server is [UP] +Schema Registry is [UP] +ZooKeeper is [UP] +``` + +### 验证插件是否安装成功 + +在 Kafka Connect 组件完全启动后,可用以下命令列出成功加载的插件: + +``` +confluent local services connect plugin list +``` + +如果成功安装,会输出如下: + +```txt {4,9} +Available Connect Plugins: +[ + { + "class": "com.taosdata.kafka.connect.sink.TDengineSinkConnector", + "type": "sink", + "version": "1.0.0" + }, + { + "class": "com.taosdata.kafka.connect.source.TDengineSourceConnector", + "type": "source", + "version": "1.0.0" + }, +...... +``` + +如果插件安装失败,请检查 Kafka Connect 的启动日志是否有异常信息,用以下命令输出日志路径: +``` +echo `cat /tmp/confluent.current`/connect/connect.stdout +``` +该命令的输出类似: `/tmp/confluent.104086/connect/connect.stdout`。 + +与日志文件 `connect.stdout` 同一目录,还有一个文件名为: `connect.properties`。在这个文件的末尾,可以看到最终生效的 `plugin.path`, 它是一系列用逗号分割的路径。如果插件安装失败,很可能是因为实际的安装路径不包含在 `plugin.path` 中。 + + ## TDengine Sink Connector 的使用 TDengine Sink Connector 的作用是同步指定 topic 的数据到 TDengine。用户无需提前创建数据库和超级表。可手动指定目标数据库的名字(见配置参数 connection.database), 也可按一定规则生成(见配置参数 connection.database.prefix)。 @@ -144,7 +199,7 @@ vi sink-demo.properties sink-demo.properties 内容如下: ```ini title="sink-demo.properties" -name=tdengine-sink-demo +name=TDengineSinkConnector connector.class=com.taosdata.kafka.connect.sink.TDengineSinkConnector tasks.max=1 topics=meters @@ -153,6 +208,7 @@ connection.user=root connection.password=taosdata connection.database=power db.schemaless=line +data.precision=ns key.converter=org.apache.kafka.connect.storage.StringConverter value.converter=org.apache.kafka.connect.storage.StringConverter ``` @@ -179,6 +235,7 @@ confluent local services connect connector load TDengineSinkConnector --config . "connection.url": "jdbc:TAOS://127.0.0.1:6030", "connection.user": "root", "connector.class": "com.taosdata.kafka.connect.sink.TDengineSinkConnector", + "data.precision": "ns", "db.schemaless": "line", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "tasks.max": "1", @@ -223,10 +280,10 @@ Database changed. taos> select * from meters; ts | current | voltage | phase | groupid | location | =============================================================================================================================================================== - 2022-03-28 09:56:51.249000000 | 11.800000000 | 221.000000000 | 0.280000000 | 2 | California.LosAngeles | - 2022-03-28 09:56:51.250000000 | 13.400000000 | 223.000000000 | 0.290000000 | 2 | California.LosAngeles | - 2022-03-28 09:56:51.249000000 | 10.800000000 | 223.000000000 | 0.290000000 | 3 | California.LosAngeles | - 2022-03-28 09:56:51.250000000 | 11.300000000 | 221.000000000 | 0.350000000 | 3 | California.LosAngeles | + 2022-03-28 09:56:51.249000000 | 11.800000000 | 221.000000000 | 0.280000000 | 2 | California.LosAngeles | + 2022-03-28 09:56:51.250000000 | 13.400000000 | 223.000000000 | 0.290000000 | 2 | California.LosAngeles | + 2022-03-28 09:56:51.249000000 | 10.800000000 | 223.000000000 | 0.290000000 | 3 | California.LosAngeles | + 2022-03-28 09:56:51.250000000 | 11.300000000 | 221.000000000 | 0.350000000 | 3 | California.LosAngeles | Query OK, 4 row(s) in set (0.004208s) ``` @@ -356,21 +413,33 @@ confluent local services connect connector unload TDengineSourceConnector 2. `connection.database.prefix`: 当 connection.database 为 null 时, 目标数据库的前缀。可以包含占位符 '${topic}'。 比如 kafka_${topic}, 对于主题 'orders' 将写入数据库 'kafka_orders'。 默认 null。当为 null 时,目标数据库的名字和主题的名字是一致的。 3. `batch.size`: 分批写入每批记录数。当 Sink Connector 一次接收到的数据大于这个值时将分批写入。 4. `max.retries`: 发生错误时的最大重试次数。默认为 1。 -5. `retry.backoff.ms`: 发送错误时重试的时间间隔。单位毫秒,默认 3000。 -6. `db.schemaless`: 数据格式,必须指定为: line、json、telnet 中的一个。分别代表 InfluxDB 行协议格式、 OpenTSDB JSON 格式、 OpenTSDB Telnet 行协议格式。 +5. `retry.backoff.ms`: 发送错误时重试的时间间隔。单位毫秒,默认为 3000。 +6. `db.schemaless`: 数据格式,可选值为: + 1. line :代表 InfluxDB 行协议格式 + 2. json : 代表 OpenTSDB JSON 格式 + 3. telnet :代表 OpenTSDB Telnet 行协议格式 +7. `data.precision`: 使用 InfluxDB 行协议格式时,时间戳的精度。可选值为: + 1. ms : 表示毫秒 + 2. us : 表示微秒 + 3. ns : 表示纳秒。默认为纳秒。 ### TDengine Source Connector 特有的配置 1. `connection.database`: 源数据库名称,无缺省值。 2. `topic.prefix`: 数据导入 kafka 后 topic 名称前缀。 使用 `topic.prefix` + `connection.database` 名称作为完整 topic 名。默认为空字符串 ""。 -3. `timestamp.initial`: 数据同步起始时间。格式为'yyyy-MM-dd HH:mm:ss'。默认 "1970-01-01 00:00:00"。 -4. `poll.interval.ms`: 拉取数据间隔,单位为 ms。默认 1000。 +3. `timestamp.initial`: 数据同步起始时间。格式为'yyyy-MM-dd HH:mm:ss'。默认为 "1970-01-01 00:00:00"。 +4. `poll.interval.ms`: 拉取数据间隔,单位为 ms。默认为 1000。 5. `fetch.max.rows` : 检索数据库时最大检索条数。 默认为 100。 -6. `out.format`: 数据格式。取值 line 或 json。line 表示 InfluxDB Line 协议格式, json 表示 OpenTSDB JSON 格式。默认 line。 +6. `out.format`: 数据格式。取值 line 或 json。line 表示 InfluxDB Line 协议格式, json 表示 OpenTSDB JSON 格式。默认为 line。 + +## 其他说明 + +1. 插件的安装位置可以自定义,请参考官方文档:https://docs.confluent.io/home/connect/self-managed/install.html#install-connector-manually。 +2. 本教程的示例程序使用了 Confluent 平台,但是 TDengine Kafka Connector 本身同样适用于独立安装的 Kafka, 且配置方法相同。关于如何在独立安装的 Kafka 环境使用 Kafka Connect 插件, 请参考官方文档: https://kafka.apache.org/documentation/#connect。 ## 问题反馈 -https://github.com/taosdata/kafka-connect-tdengine/issues +无论遇到任何问题,都欢迎在本项目的 Github 仓库反馈: https://github.com/taosdata/kafka-connect-tdengine/issues。 ## 参考 diff --git a/docs-en/05-get-started/_pkg_install.mdx b/docs-en/05-get-started/_pkg_install.mdx index af04d2b70bda7575e57cc49a5aa60f19689113e6..cf10497c96ba1d777e45340b0312d97c127b6fcb 100644 --- a/docs-en/05-get-started/_pkg_install.mdx +++ b/docs-en/05-get-started/_pkg_install.mdx @@ -12,6 +12,6 @@ Between two major release versions, some beta versions may be delivered for user For the details please refer to [Install and Uninstall](/operation/pkg-install)。 -To see the details of versions, please refer to [Download List](https://www.taosdata.com/all-downloads) and [Release Notes](https://github.com/taosdata/TDengine/releases). +To see the details of versions, please refer to [Download List](https://tdengine.com/all-downloads) and [Release Notes](https://github.com/taosdata/TDengine/releases). diff --git a/docs-en/12-taos-sql/07-function.md b/docs-en/12-taos-sql/07-function.md index 1c2961d7b59ffbfb6df7d8488953b2d4a17201e9..3589efe9cdd618110203a8439ba03eaaf315f48c 100644 --- a/docs-en/12-taos-sql/07-function.md +++ b/docs-en/12-taos-sql/07-function.md @@ -327,6 +327,32 @@ taos> SELECT HISTOGRAM(voltage, 'log_bin', '{"start": 1, "factor": 3, "count": 3 {"lower_bin":27, "upper_bin":inf, "count":1} | ``` +### ELAPSED + +```mysql +SELECT ELAPSED(field_name[, time_unit]) FROM { tb_name | stb_name } [WHERE clause] [INTERVAL(interval [, offset]) [SLIDING sliding]]; +``` + +**Description**:`elapsed` function can be used to calculate the continuous time length in which there is valid data. If it's used with `INTERVAL` clause, the returned result is the calcualted time length within each time window. If it's used without `INTERVAL` caluse, the returned result is the calculated time length within the specified time range. Please be noted that the return value of `elapsed` is the number of `time_unit` in the calculated time length. + +**Return value type**:Double + +**Applicable Column type**:Timestamp + +**Applicable versions**:Sicne version 2.6.0.0 + +**Applicable tables**: table, STable, outter in nested query + +**Explanations**: +- `field_name` parameter can only be the first column of a table, i.e. timestamp primary key. +- The minimum value of `time_unit` is the time precision of the database. If `time_unit` is not specified, the time precision of the database is used as the default ime unit. +- It can be used with `INTERVAL` to get the time valid time length of each time window. Please be noted that the return value is same as the time window for all time windows except for the first and the last time window. +- `order by asc/desc` has no effect on the result. +- `group by tbname` must be used together when `elapsed` is used against a STable. +- `group by` must NOT be used together when `elapsed` is used against a table or sub table. +- When used in nested query, it's only applicable when the inner query outputs an implicit timestamp column as the primary key. For example, `select elapsed(ts) from (select diff(value) from sub1)` is legal usage while `select elapsed(ts) from (select * from sub1)` is not. +- It can't be used with `leastsquares`, `diff`, `derivative`, `top`, `bottom`, `last_row`, `interp`. + ## Selection Functions When any select function is used, timestamp column or tag columns including `tbname` can be specified to show that the selected value are from which rows. diff --git a/docs-en/12-taos-sql/12-keywords.md b/docs-en/12-taos-sql/12-keywords.md index fa750300b71251e1172dba13f91d05822f9ac1f4..56a82a02a1fada712141f3572b761e0cd18576c6 100644 --- a/docs-en/12-taos-sql/12-keywords.md +++ b/docs-en/12-taos-sql/12-keywords.md @@ -46,3 +46,44 @@ There are about 200 keywords reserved by TDengine, they can't be used as the nam | CONNECTIONS | HAVING | NOT | SOFFSET | VNODES | | CONNS | ID | NOTNULL | STable | WAL | | COPY | IF | NOW | STableS | WHERE | +| _C0 | _QSTART | _QSTOP | _QDURATION | _WSTART | +| _WSTOP | _WDURATION | + +## Explanations +### TBNAME +`TBNAME` can be considered as a special tag, which represents the name of the subtable, in STable. + +Get the table name and tag values of all subtables in a STable. +```mysql +SELECT TBNAME, location FROM meters; + +Count the number of subtables in a STable. +```mysql +SELECT COUNT(TBNAME) FROM meters; +``` + +Only filter on TAGS can be used in WHERE clause in the above two query statements. +```mysql +taos> SELECT TBNAME, location FROM meters; + tbname | location | +================================================================== + d1004 | California.SanFrancisco | + d1003 | California.SanFrancisco | + d1002 | California.LosAngeles | + d1001 | California.LosAngeles | +Query OK, 4 row(s) in set (0.000881s) + +taos> SELECT COUNT(tbname) FROM meters WHERE groupId > 2; + count(tbname) | +======================== + 2 | +Query OK, 1 row(s) in set (0.001091s) +``` +### _QSTART/_QSTOP/_QDURATION +The start, stop and duration of a query time window (Since version 2.6.0.0). + +### _WSTART/_WSTOP/_WDURATION +The start, stop and duration of aggegate query by time window, like interval, session window, state window (Since version 2.6.0.0). + +### _c0 +The first column of a table or STable. \ No newline at end of file diff --git a/docs-en/14-reference/03-connector/cpp.mdx b/docs-en/14-reference/03-connector/cpp.mdx index 4b388d32a9050645e268bb267d16e9a5b8aa4bda..d13a74384ccc99b4200f89cdba98e5ba902e41f8 100644 --- a/docs-en/14-reference/03-connector/cpp.mdx +++ b/docs-en/14-reference/03-connector/cpp.mdx @@ -4,7 +4,7 @@ sidebar_label: C/C++ title: C/C++ Connector --- -C/C++ developers can use TDengine's client driver and the C/C++ connector, to develop their applications to connect to TDengine clusters for data writing, querying, and other functions. To use it, you need to include the TDengine header file _taos.h_, which lists the function prototypes of the provided APIs; the application also needs to link to the corresponding dynamic libraries on the platform where it is located. +C/C++ developers can use TDengine's client driver and the C/C++ connector, to develop their applications to connect to TDengine clusters for data writing, querying, and other functions. To use the C/C++ connector you must include the TDengine header file _taos.h_, which lists the function prototypes of the provided APIs. The application also needs to link to the corresponding dynamic libraries on the platform where it is located. ```c #include @@ -26,7 +26,7 @@ Please refer to [list of supported platforms](/reference/connector#supported-pla ## Supported versions -The version number of the TDengine client driver and the version number of the TDengine server require one-to-one correspondence and recommend using the same version of client driver as what the TDengine server version is. Although a lower version of the client driver is compatible to work with a higher version of the server, if the first three version numbers are the same (i.e., only the fourth version number is different), but it is not recommended. It is strongly discouraged to use a higher version of the client driver to access a lower version of the TDengine server. +The version number of the TDengine client driver and the version number of the TDengine server should be the same. A lower version of the client driver is compatible with a higher version of the server, if the first three version numbers are the same (i.e., only the fourth version number is different). For e.g. if the client version is x.y.z.1 and the server version is x.y.z.2 the client and server are compatible. But in general we do not recommend using a lower client version with a newer server version. It is also strongly discouraged to use a higher version of the client driver to access a lower version of the TDengine server. ## Installation steps @@ -55,7 +55,7 @@ In the above example code, `taos_connect()` establishes a connection to port 603 :::note -- If not specified, when the return value of the API is an integer, _0_ means success, the others are error codes representing the reason for failure, and when the return value is a pointer, _NULL_ means failure. +- If not specified, when the return value of the API is an integer, _0_ means success. All others are error codes representing the reason for failure. When the return value is a pointer, _NULL_ means failure. - All error codes and their corresponding causes are described in the `taoserror.h` file. ::: @@ -140,13 +140,12 @@ The base API is used to do things like create database connections and provide a - `void taos_cleanup()` - Clean up the runtime environment and should be called before the application exits. + Cleans up the runtime environment and should be called before the application exits. - ` int taos_options(TSDB_OPTION option, const void * arg, ...) ` Set client options, currently supports region setting (`TSDB_OPTION_LOCALE`), character set -(`TSDB_OPTION_CHARSET`), time zone -(`TSDB_OPTION_TIMEZONE`), configuration file path (`TSDB_OPTION_CONFIGDIR`) . The region setting, character set, and time zone default to the current settings of the operating system. +(`TSDB_OPTION_CHARSET`), time zone (`TSDB_OPTION_TIMEZONE`), configuration file path (`TSDB_OPTION_CONFIGDIR`). The region setting, character set, and time zone default to the current settings of the operating system. - `char *taos_get_client_info()` @@ -159,7 +158,7 @@ The base API is used to do things like create database connections and provide a - host: FQDN of any node in the TDengine cluster - user: user name - pass: password - - db: database name, if the user does not provide, it can also be connected correctly, the user can create a new database through this connection, if the user provides the database name, it means that the database user has already created, the default use of the database + - db: the database name. Even if the user does not provide this, the connection will still work correctly. The user can create a new database through this connection. If the user provides the database name, it means that the database has already been created and the connection can be used for regular operations on the database. - port: the port the taosd program is listening on NULL indicates a failure. The application needs to save the returned parameters for subsequent use. @@ -187,7 +186,7 @@ The APIs described in this subsection are all synchronous interfaces. After bein - `TAOS_RES* taos_query(TAOS *taos, const char *sql)` - Executes an SQL command, either a DQL, DML, or DDL statement. The `taos` parameter is a handle obtained with `taos_connect()`. You can't tell if the result failed by whether the return value is `NULL`, but by parsing the error code in the result set with the `taos_errno()` function. + Executes an SQL command, either a DQL, DML, or DDL statement. The `taos` parameter is a handle obtained with `taos_connect()`. If the return value is `NULL` this does not necessarily indicate a failure. You can get the error code, if any, by parsing the error code in the result set with the `taos_errno()` function. - `int taos_result_precision(TAOS_RES *res)` @@ -231,7 +230,7 @@ typedef struct taosField { - ` void taos_free_result(TAOS_RES *res)` - Frees the query result set and the associated resources. Be sure to call this API to free the resources after the query is completed. Otherwise, it may lead to a memory leak in the application. However, note that the application will crash if you call a function like `taos_consume()` to get the query results after freeing the resources. + Frees the query result set and the associated resources. Be sure to call this API to free the resources after the query is completed. Failing to call this, may lead to a memory leak in the application. However, note that the application will crash if you call a function like `taos_consume()` to get the query results after freeing the resources. - `char *taos_errstr(TAOS_RES *res)` @@ -242,7 +241,7 @@ typedef struct taosField { Get the reason for the last API call failure. The return value is the error code. :::note -TDengine version 2.0 and above recommends that each thread of a database application create a separate connection or a connection pool based on threads. It is not recommended to pass the connection (TAOS\*) structure to different threads for shared use in the application. Queries, writes, etc., issued based on TAOS structures are multi-thread safe, but state quantities such as "USE statement" may interfere between threads. In addition, the C connector can dynamically create new database-oriented connections on demand (this procedure is not visible to the user), and it is recommended that `taos_close()` be called only at the final exit of the program to close the connection. +TDengine version 2.0 and above recommends that each thread of a database application create a separate connection or a connection pool based on threads. It is not recommended to pass the connection (TAOS\*) structure to different threads for shared use in the application. Queries, writes, and other operations issued that are based on TAOS structures are multi-thread safe, but state quantities such as the "USE statement" may interfere between threads. In addition, the C connector can dynamically create new database-oriented connections on demand (this procedure is not visible to the user), and it is recommended that `taos_close()` be called only at the final exit of the program to close the connection. ::: @@ -274,12 +273,12 @@ All TDengine's asynchronous APIs use a non-blocking call pattern. Applications c ### Parameter Binding API -In addition to direct calls to `taos_query()` to perform queries, TDengine also provides a set of `bind` APIs that supports parameter binding, similar in style to MySQL, and currently only supports using a question mark `? ` to represent the parameter to be bound. +In addition to direct calls to `taos_query()` to perform queries, TDengine also provides a set of `bind` APIs that supports parameter binding, similar in style to MySQL. TDengine currently only supports using a question mark `? ` to represent the parameter to be bound. -Starting with versions 2.1.1.0 and 2.1.2.0, TDengine has significantly improved the bind APIs to support for data writing (INSERT) scenarios. This avoids the resource consumption of SQL syntax parsing when writing data through the parameter binding interface, thus significantly improving write performance in most cases. A typical operation, in this case, is as follows. +Starting with versions 2.1.1.0 and 2.1.2.0, TDengine has significantly improved the bind APIs to support data writing (INSERT) scenarios. This avoids the resource consumption of SQL syntax parsing when writing data through the parameter binding interface, thus significantly improving write performance in most cases. A typical operation, in this case, is as follows. 1. call `taos_stmt_init()` to create the parameter binding object. -2. call `taos_stmt_prepare()` to parse the INSERT statement. 3. +2. call `taos_stmt_prepare()` to parse the INSERT statement. 3. call `taos_stmt_set_tbname()` to set the table name if it is reserved in the INSERT statement but not the TAGS. 4. call `taos_stmt_set_tbname_tags()` to set the table name and TAGS values if the table name and TAGS are reserved in the INSERT statement (for example, if the INSERT statement takes an automatic table build). 5. call `taos_stmt_bind_param_batch()` to set the value of VALUES in multiple columns, or call `taos_stmt_bind_param()` to set the value of VALUES in a single row. @@ -383,7 +382,7 @@ In addition to writing data using the SQL method or the parameter binding API, w **return value** TAOS_RES structure, application can get error message by using `taos_errstr()` and also error code by using `taos_errno()`. In some cases, the returned TAOS_RES is `NULL`, and it is still possible to call `taos_errno()` to safely get the error code information. - The returned TAOS_RES needs to be freed by the caller. Otherwise, a memory leak will occur. + The returned TAOS_RES needs to be freed by the caller in order to avoid memory leaks. **Description** The protocol type is enumerated and contains the following three formats. @@ -416,13 +415,13 @@ The Subscription API currently supports subscribing to one or more tables and co This function is responsible for starting the subscription service, returning the subscription object on success and `NULL` on failure, with the following parameters. - - taos: the database connection that has been established - - restart: if the subscription already exists, whether to restart or continue the previous subscription - - topic: the topic of the subscription (i.e., the name). This parameter is the unique identifier of the subscription - - sql: the query statement of the subscription, this statement can only be _select_ statement, only the original data should be queried, only the data can be queried in time order - - fp: the callback function when the query result is received (the function prototype will be introduced later), only used when called asynchronously. This parameter should be passed `NULL` when called synchronously - - param: additional parameter when calling the callback function, the system API will pass it to the callback function as it is, without any processing - - interval: polling period in milliseconds. The callback function will be called periodically according to this parameter when called asynchronously. not recommended to set this parameter too small To avoid impact on system performance when called synchronously. If the interval between two calls to `taos_consume()` is less than this period, the API will block until the interval exceeds this period. + - taos: the database connection that has been established. + - restart: if the subscription already exists, whether to restart or continue the previous subscription. + - topic: the topic of the subscription (i.e., the name). This parameter is the unique identifier of the subscription. + - sql: the query statement of the subscription which can only be a _select_ statement. Only the original data should be queried, and data can only be queried in temporal order. + - fp: the callback function when the query result is received only used when called asynchronously. This parameter should be passed `NULL` when called synchronously. The function prototype is described below. + - param: additional parameter when calling the callback function. The system API will pass it to the callback function as is, without any processing. + - interval: polling period in milliseconds. The callback function will be called periodically according to this parameter when called asynchronously. The interval should not be too small to avoid impact on system performance when called synchronously. If the interval between two calls to `taos_consume()` is less than this period, the API will block until the interval exceeds this period. - ` typedef void (*TAOS_SUBSCRIBE_CALLBACK)(TAOS_SUB* tsub, TAOS_RES *res, void* param, int code)` diff --git a/docs-en/14-reference/03-connector/csharp.mdx b/docs-en/14-reference/03-connector/csharp.mdx index 1b0748fbd0d769b949d7eeb3fc72463539cc7564..5eb322cf9125fe036349de22ceea5988de46e404 100644 --- a/docs-en/14-reference/03-connector/csharp.mdx +++ b/docs-en/14-reference/03-connector/csharp.mdx @@ -179,7 +179,7 @@ namespace TDengineExample 1. "Unable to establish connection", "Unable to resolve FQDN" - Usually, it cause by the FQDN configuration is incorrect, you can refer to [How to understand TDengine's FQDN (Chinese)](https://www.taosdata.com/blog/2021/07/29/2741.html) to solve it. + Usually, it's caused by an incorrect FQDN configuration. Please refer to this section in the [FAQ](https://docs.tdengine.com/2.4/train-faq/faq/#2-how-to-handle-unable-to-establish-connection) to troubleshoot. 2. Unhandled exception. System.DllNotFoundException: Unable to load DLL 'taos' or one of its dependencies: The specified module cannot be found. diff --git a/docs-en/14-reference/03-connector/node.mdx b/docs-en/14-reference/03-connector/node.mdx index 0774c35d626dcde16c1a834677da5aacf07a392e..8f586acde4848af71efcb23358be1f8486cedb8e 100644 --- a/docs-en/14-reference/03-connector/node.mdx +++ b/docs-en/14-reference/03-connector/node.mdx @@ -225,7 +225,7 @@ See [video tutorial](https://www.taosdata.com/blog/2020/11/11/1957.html) for the 2. "Unable to establish connection", "Unable to resolve FQDN" - Usually, root cause is the FQDN is not configured correctly. You can refer to [How to understand TDengine's FQDN (In Chinese)](https://www.taosdata.com/blog/2021/07/29/2741.html). + Usually, the root cause is an incorrect FQDN configuration. You can refer to this section in the [FAQ](https://docs.tdengine.com/2.4/train-faq/faq/#2-how-to-handle-unable-to-establish-connection) to troubleshoot. ## Important Updates diff --git a/docs-en/20-third-party/11-kafka.md b/docs-en/20-third-party/11-kafka.md index 9c78a6645a0578d3b8d494d1fa60831eb88b3c81..6720af8bf81ea2f4fce415a54847453f578ababf 100644 --- a/docs-en/20-third-party/11-kafka.md +++ b/docs-en/20-third-party/11-kafka.md @@ -7,7 +7,7 @@ TDengine Kafka Connector contains two plugins: TDengine Source Connector and TDe ## What is Kafka Connect? -Kafka Connect is a component of Apache Kafka that enables other systems, such as databases, cloud services, file systems, etc., to connect to Kafka easily. Data can flow from other software to Kafka via Kafka Connect and Kafka to other systems via Kafka Connect. Plugins that read data from other software are called Source Connectors, and plugins that write data to other software are called Sink Connectors. Neither Source Connector nor Sink Connector will directly connect to Kafka Broker, and Source Connector transfers data to Kafka Connect. Sink Connector receives data from Kafka Connect. +Kafka Connect is a component of [Apache Kafka](https://kafka.apache.org/) that enables other systems, such as databases, cloud services, file systems, etc., to connect to Kafka easily. Data can flow from other software to Kafka via Kafka Connect and Kafka to other systems via Kafka Connect. Plugins that read data from other software are called Source Connectors, and plugins that write data to other software are called Sink Connectors. Neither Source Connector nor Sink Connector will directly connect to Kafka Broker, and Source Connector transfers data to Kafka Connect. Sink Connector receives data from Kafka Connect. ![TDengine Database Kafka Connector -- Kafka Connect](kafka/Kafka_Connect.webp) @@ -17,7 +17,7 @@ TDengine Source Connector is used to read data from TDengine in real-time and se ## What is Confluent? -Confluent adds many extensions to Kafka. include: +[Confluent](https://www.confluent.io/) adds many extensions to Kafka. include: 1. Schema Registry 2. REST Proxy @@ -79,10 +79,10 @@ Development: false git clone https://github.com:taosdata/kafka-connect-tdengine.git cd kafka-connect-tdengine mvn clean package -unzip -d $CONFLUENT_HOME/share/confluent-hub-components/ target/components/packages/taosdata-kafka-connect-tdengine-0.1.0.zip +unzip -d $CONFLUENT_HOME/share/java/ target/components/packages/taosdata-kafka-connect-tdengine-*.zip ``` -The above script first clones the project source code and then compiles and packages it with Maven. After the package is complete, the zip package of the plugin is generated in the `target/components/packages/` directory. Unzip this zip package to the path where the plugin is installed. The path to install the plugin is in the configuration file `$CONFLUENT_HOME/etc/kafka/connect-standalone.properties`. The default path is `$CONFLUENT_HOME/share/confluent-hub-components/`. +The above script first clones the project source code and then compiles and packages it with Maven. After the package is complete, the zip package of the plugin is generated in the `target/components/packages/` directory. Unzip this zip package to plugin path. We used `$CONFLUENT_HOME/share/java/` above because it's a build in plugin path. ### Install with confluent-hub @@ -96,7 +96,7 @@ confluent local services start ``` :::note -Be sure to install the plugin before starting Confluent. Otherwise, there will be a class not found error. The log of Kafka Connect (default path: /tmp/confluent.xxxx/connect/logs/connect.log) will output the successfully installed plugin, which users can use to determine whether the plugin is installed successfully. +Be sure to install the plugin before starting Confluent. Otherwise, Kafka Connect will fail to discover the plugins. ::: :::tip @@ -123,6 +123,59 @@ Control Center is [UP] To clear data, execute `rm -rf /tmp/confluent.106668`. ::: +### Check Confluent Services Status + +Use command bellow to check the status of all service: + +``` +confluent local services status +``` + +The expected output is: +``` +Connect is [UP] +Control Center is [UP] +Kafka is [UP] +Kafka REST is [UP] +ksqlDB Server is [UP] +Schema Registry is [UP] +ZooKeeper is [UP] +``` + +### Check Successfully Loaded Plugin + +After Kafka Connect was completely started, you can use bellow command to check if our plugins are installed successfully: +``` +confluent local services connect plugin list +``` + +The output should contains `TDengineSinkConnector` and `TDengineSourceConnector` as bellow: + +``` +Available Connect Plugins: +[ + { + "class": "com.taosdata.kafka.connect.sink.TDengineSinkConnector", + "type": "sink", + "version": "1.0.0" + }, + { + "class": "com.taosdata.kafka.connect.source.TDengineSourceConnector", + "type": "source", + "version": "1.0.0" + }, +...... +``` + +If not, please check the log file of Kafka Connect. To view the log file path, please execute: + +``` +echo `cat /tmp/confluent.current`/connect/connect.stdout +``` +It should produce a path like:`/tmp/confluent.104086/connect/connect.stdout` + +Besides log file `connect.stdout` there is a file named `connect.properties`. At the end of this file you can see the effective `plugin.path` which is a series of paths joined by comma. If Kafka Connect not found our plugins, it's probably because the installed path is not included in `plugin.path`. + ## The use of TDengine Sink Connector The role of the TDengine Sink Connector is to synchronize the data of the specified topic to TDengine. Users do not need to create databases and super tables in advance. The name of the target database can be specified manually (see the configuration parameter connection.database), or it can be generated according to specific rules (see the configuration parameter connection.database.prefix). @@ -142,7 +195,7 @@ vi sink-demo.properties sink-demo.properties' content is following: ```ini title="sink-demo.properties" -name=tdengine-sink-demo +name=TDengineSinkConnector connector.class=com.taosdata.kafka.connect.sink.TDengineSinkConnector tasks.max=1 topics=meters @@ -151,6 +204,7 @@ connection.user=root connection.password=taosdata connection.database=power db.schemaless=line +data.precision=ns key.converter=org.apache.kafka.connect.storage.StringConverter value.converter=org.apache.kafka.connect.storage.StringConverter ``` @@ -177,6 +231,7 @@ If the above command is executed successfully, the output is as follows: "connection.url": "jdbc:TAOS://127.0.0.1:6030", "connection.user": "root", "connector.class": "com.taosdata.kafka.connect.sink.TDengineSinkConnector", + "data.precision": "ns", "db.schemaless": "line", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "tasks.max": "1", @@ -221,10 +276,10 @@ Database changed. taos> select * from meters; ts | current | voltage | phase | groupid | location | =============================================================================================================================================================== - 2022-03-28 09:56:51.249000000 | 11.800000000 | 221.000000000 | 0.280000000 | 2 | California.LoSangeles | - 2022-03-28 09:56:51.250000000 | 13.400000000 | 223.000000000 | 0.290000000 | 2 | California.LoSangeles | - 2022-03-28 09:56:51.249000000 | 10.800000000 | 223.000000000 | 0.290000000 | 3 | California.LoSangeles | - 2022-03-28 09:56:51.250000000 | 11.300000000 | 221.000000000 | 0.350000000 | 3 | California.LoSangeles | + 2022-03-28 09:56:51.249000000 | 11.800000000 | 221.000000000 | 0.280000000 | 2 | California.LosAngeles | + 2022-03-28 09:56:51.250000000 | 13.400000000 | 223.000000000 | 0.290000000 | 2 | California.LosAngeles | + 2022-03-28 09:56:51.249000000 | 10.800000000 | 223.000000000 | 0.290000000 | 3 | California.LosAngeles | + 2022-03-28 09:56:51.250000000 | 11.300000000 | 221.000000000 | 0.350000000 | 3 | California.LosAngeles | Query OK, 4 row(s) in set (0.004208s) ``` @@ -356,6 +411,7 @@ The following configuration items apply to TDengine Sink Connector and TDengine 4. `max.retries`: The maximum number of retries when an error occurs. Defaults to 1. 5. `retry.backoff.ms`: The time interval for retry when sending an error. The unit is milliseconds. The default is 3000. 6. `db.schemaless`: Data format, could be one of `line`, `json`, and `telnet`. Represent InfluxDB line protocol format, OpenTSDB JSON format, and OpenTSDB Telnet line protocol format. +7. `data.precision`: The time precision when use InfluxDB line protocol format data, could be one of `ms`, `us` and `ns`. The default is `ns`. ### TDengine Source Connector specific configuration @@ -366,7 +422,13 @@ The following configuration items apply to TDengine Sink Connector and TDengine 5. `fetch.max.rows`: The maximum number of rows retrieved when retrieving the database. Default is 100. 6. `out.format`: The data format. The value could be line or json. The line represents the InfluxDB Line protocol format, and json represents the OpenTSDB JSON format. Default is `line`. -## feedback + +## Other notes + +1. To install plugin to a customized location, refer to https://docs.confluent.io/home/connect/self-managed/install.html#install-connector-manually. +2. To use Kafka Connect without confluent, refer to https://kafka.apache.org/documentation/#connect. + +## Feedback https://github.com/taosdata/kafka-connect-tdengine/issues diff --git a/docs-en/25-application/03-immigrate.md b/docs-en/25-application/03-immigrate.md index 69166bf78b66a23af35af726f2e5c477195a3595..4d47aec1d76014ba63f6be91004abcc3934769f7 100644 --- a/docs-en/25-application/03-immigrate.md +++ b/docs-en/25-application/03-immigrate.md @@ -3,10 +3,9 @@ sidebar_label: OpenTSDB Migration to TDengine title: Best Practices for Migrating OpenTSDB Applications to TDengine --- -As a distributed, scalable, HBase-based distributed time-series database software, thanks to its first-mover advantage, OpenTSDB has been introduced and widely used in DevOps by people. However, using new technologies like cloud computing, microservices, and containerization technology with rapid development. Enterprise-level services are becoming more and more diverse. The architecture is becoming more complex. +As a distributed, scalable, distributed time-series database platform based on HBase, and thanks to its first-mover advantage, OpenTSDB is widely used for monitoring in DevOps. However, as new technologies like cloud computing, microservices, and containerization technology has developed rapidly, Enterprise-level services are becoming more and more diverse and the architecture is becoming more complex. -From this situation, it increasingly plagues to use of OpenTSDB as a DevOps backend storage for monitoring by performance issues and delayed feature upgrades. The resulting increase in application deployment costs and reduced operational efficiency. -These problems are becoming increasingly severe as the system scales up. +As a result, as a DevOps backend for monitoring, OpenTSDB is plagued by performance issues and delayed feature upgrades. This has resulted in increased application deployment costs and reduced operational efficiency. These problems become increasingly severe as the system tries to scale up. To meet the fast-growing IoT big data market and technical needs, TAOSData developed an innovative big-data processing product, **TDengine**. @@ -14,14 +13,14 @@ After learning the advantages of many traditional relational databases and NoSQL Compared with OpenTSDB, TDengine has the following distinctive features. -- Performance of data writing and querying far exceeds that of OpenTSDB. -- Efficient compression mechanism for time-series data, which compresses less than 1/5 of the storage space on disk. -- The installation and deployment are straightforward. A single installation package can complete the installation and deployment and does not rely on other third-party software. The entire installation and deployment process in a few seconds; -- The built-in functions cover all of OpenTSDB's query functions. And support more time-series data query functions, scalar functions, and aggregation functions. And support advanced query functions such as multiple time-window aggregations, join query, expression operation, multiple group aggregation, user-defined sorting, and user-defined functions. Adopting SQL-like syntax rules is more straightforward and has no learning cost. +- Data writing and querying performance far exceeds that of OpenTSDB. +- Efficient compression mechanism for time-series data, which compresses to less than 1/5 of the storage space, on disk. +- The installation and deployment are straightforward. A single installation package can complete the installation and deployment and does not rely on other third-party software. The entire installation and deployment process takes a few seconds. +- The built-in functions cover all of OpenTSDB's query functions and TDengine supports more time-series data query functions, scalar functions, and aggregation functions. TDengine also supports advanced query functions such as multiple time-window aggregations, join query, expression operation, multiple group aggregation, user-defined sorting, and user-defined functions. With a SQL-like query language, querying is more straightforward and has no learning cost. - Supports up to 128 tags, with a total tag length of 16 KB. - In addition to the REST interface, it also provides interfaces to Java, Python, C, Rust, Go, C# and other languages. Its supports a variety of enterprise-class standard connector protocols such as JDBC. -If we migrate the applications originally running on OpenTSDB to TDengine, we will effectively reduce the compute and storage resource consumption and the number of deployed servers. And will also significantly reduce the operation and maintenance costs, making operation and maintenance management more straightforward and more accessible, and considerably reducing the total cost of ownership. Like OpenTSDB, TDengine has also been open-sourced, including the stand-alone version and the cluster version source code. So there is no need to be concerned about the vendor-lock problem. +Migrating applications originally running on OpenTSDB to TDengine, effectively reduces compute and storage resource consumption and the number of deployed servers. It also significantly reduces operation and maintenance costs, makes operation and maintenance management more straightforward and more accessible, and considerably reduces the total cost of ownership. Like OpenTSDB, TDengine has also been open-sourced. Both the stand-alone version and the cluster version are open-sourced and there is no need to be concerned about the vendor-lock problem. We will explain how to migrate OpenTSDB applications to TDengine quickly, securely, and reliably without coding, using the most typical DevOps scenarios. Subsequent chapters will go into more depth to facilitate migration for non-DevOps systems. @@ -34,7 +33,7 @@ The following figure (Figure 1) shows the system's overall architecture for a ty **Figure 1. Typical architecture in a DevOps scenario** ![TDengine Database IT-DevOps-Solutions-Immigrate-OpenTSDB-Arch](./IT-DevOps-Solutions-Immigrate-OpenTSDB-Arch.webp "Figure 1. Typical architecture in a DevOps scenario") -In this application scenario, there are Agent tools deployed in the application environment to collect machine metrics, network metrics, and application metrics. Data collectors to aggregate information collected by agents, systems for persistent data storage and management, and tools for monitoring data visualization (e.g., Grafana, etc.). +In this application scenario, there are Agent tools deployed in the application environment to collect machine metrics, network metrics, and application metrics. There are also data collectors to aggregate information collected by agents, systems for persistent data storage and management, and tools for data visualization (e.g., Grafana, etc.). The agents deployed in the application nodes are responsible for providing operational metrics from different sources to collectd/Statsd. And collectd/StatsD is accountable for pushing the aggregated data to the OpenTSDB cluster system and then visualizing the data using the visualization kanban board software, Grafana. @@ -44,15 +43,15 @@ The agents deployed in the application nodes are responsible for providing opera First of all, please install TDengine. Download the latest stable version of TDengine from the official website and install it. For help with using various installation packages, please refer to the blog ["Installation and Uninstallation of TDengine Multiple Installation Packages"](https://www.taosdata.com/blog/2019/08/09/566.html). -Note that once the installation is complete, do not start the `taosd` service immediately, but after properly configuring the parameters. +Note that once the installation is complete, do not start the `taosd` service before properly configuring the parameters. - **Adjusting the data collector configuration** TDengine version 2.4 and later version includes `taosAdapter`. taosAdapter is a stateless, rapidly elastic, and scalable component. taosAdapter supports Influxdb's Line Protocol and OpenTSDB's telnet/JSON writing protocol specification, providing rich data access capabilities, effectively saving user migration costs and reducing the difficulty of user migration. -Users can flexibly deploy taosAdapter instances according to their requirements to rapidly improve the throughput of data writes in conjunction with the needs of scenarios and provide guarantees for data writes in different application scenarios. +Users can flexibly deploy taosAdapter instances, based on their requirements, to improve data writing throughput and provide guarantees for data writes in different application scenarios. -Through taosAdapter, users can directly push the data collected by `collectd` or `StatsD` to TDengine to achieve seamless migration of application scenarios, which is very easy and convenient. taosAdapter also supports Telegraf, Icinga, TCollector, and node_exporter data. For more details, please refer to [taosAdapter](/reference/taosadapter/). +Through taosAdapter, users can directly write the data collected by `collectd` or `StatsD` to TDengine to achieve easy, convenient and seamless migration in application scenarios. taosAdapter also supports Telegraf, Icinga, TCollector, and node_exporter data. For more details, please refer to [taosAdapter](/reference/taosadapter/). If using collectd, modify the configuration file in its default location `/etc/collectd/collectd.conf` to point to the IP address and port of the node where to deploy taosAdapter. For example, assuming the taosAdapter IP address is 192.168.1.130 and port 6046, configure it as follows. @@ -66,56 +65,55 @@ LoadPlugin write_tsdb ``` -You can use collectd and push the data to taosAdapter utilizing the push to OpenTSDB plugin. taosAdapter will call the API to write the data to TDengine, thus completing the writing of the data. If you are using StatsD, adjust the profile information accordingly. +You can use collectd and push the data to taosAdapter utilizing the write_tsdb plugin. taosAdapter will call the API to write the data to TDengine. If you are using StatsD, adjust the profile information accordingly. - **Tuning the Dashboard system** -After writing the data to TDengine properly, you can adapt Grafana to visualize the data written to TDengine. To obtain and use the Grafana plugin provided by TDengine, please refer to [Links to other tools](/third-party/grafana). +After writing the data to TDengine, you can configure Grafana to visualize the data written to TDengine. To obtain and use the Grafana plugin provided by TDengine, please refer to [Links to other tools](/third-party/grafana). TDengine provides two sets of Dashboard templates by default, and users only need to import the templates from the Grafana directory into Grafana to activate their use. **Importing Grafana Templates** Figure 2. ![TDengine Database IT-DevOps-Solutions-Immigrate-OpenTSDB-Dashboard](./IT-DevOps-Solutions-Immigrate-OpenTSDB-Dashboard.webp "Figure 2. Importing a Grafana Template") -After the above steps, you completed the migration to replace OpenTSDB with TDengine. You can see that the whole process is straightforward, there is no need to write any code, and only some configuration files need to be adjusted to meet the migration work. +With the above steps completed, you have finished replacing OpenTSDB with TDengine. You can see that the whole process is straightforward, there is no need to write any code, and only some configuration files need to be changed. ### 3. Post-migration architecture -After completing the migration, the figure below (Figure 3) shows the system's overall architecture. The whole process of the acquisition side, the data writing, and the monitoring and presentation side are all kept stable, except for a few configuration adjustments, which do not involve any critical changes or alterations. OpenTSDB to TDengine migration action, using TDengine more powerful processing power and query performance. +After completing the migration, the figure below (Figure 3) shows the system's overall architecture. The whole process of the acquisition side, the data writing, and the monitoring and presentation side are all kept stable. There are a few configuration adjustments, which do not involve any critical changes or alterations. Migrating to TDengine from OpenTSDB leads to powerful processing power and query performance. -In most DevOps scenarios, if you have a small OpenTSDB cluster (3 or fewer nodes) for providing the storage layer of DevOps and rely on OpenTSDB to give a data persistence layer and query capabilities, you can safely replace OpenTSDB with TDengine. TDengine will save more compute and storage resources. With the same compute resource allocation, a single TDengine can meet the service capacity provided by 3 to 5 OpenTSDB nodes. If the scale is more prominent, then TDengine clustering is required. - -Suppose your application is particularly complex, or the application domain is not a DevOps scenario. You can continue reading subsequent chapters for a more comprehensive and in-depth look at the advanced topics of migrating an OpenTSDB application to TDengine. +In most DevOps scenarios, if you have a small OpenTSDB cluster (3 or fewer nodes) which provides storage and data persistence layer in addition to query capability, you can safely replace OpenTSDB with TDengine. TDengine will save compute and storage resources. With the same compute resource allocation, a single TDengine can meet the service capacity provided by 3 to 5 OpenTSDB nodes. TDengine clustering may be required depending on the scale of the application. **Figure 3. System architecture after migration** ![TDengine Database IT-DevOps-Solutions-Immigrate-TDengine-Arch](./IT-DevOps-Solutions-Immigrate-TDengine-Arch.webp "Figure 3. System architecture after migration completion") +The following chapters provide a more comprehensive and in-depth look at the advanced topics of migrating an OpenTSDB application to TDengine. This will be useful if your application is particularly complex and is not a DevOps application. + ## Migration evaluation and strategy for other scenarios ### 1. Differences between TDengine and OpenTSDB This chapter describes the differences between OpenTSDB and TDengine at the system functionality level. After reading this chapter, you can fully evaluate whether you can migrate some complex OpenTSDB-based applications to TDengine, and what you should pay attention to after migration. -TDengine currently only supports Grafana for visual kanban rendering, so if your application uses front-end kanban boards other than Grafana (e.g., [TSDash](https://github.com/facebook/tsdash), [Status Wolf](https://github.com/box/StatusWolf), etc.). You cannot directly migrate those front-end kanbans to TDengine, and the front-end kanban will need to be ported to Grafana to work correctly. +TDengine currently only supports Grafana for visual kanban rendering, so if your application uses front-end kanban boards other than Grafana (e.g., [TSDash](https://github.com/facebook/tsdash), [Status Wolf](https://github.com/box/StatusWolf), etc.) you cannot directly migrate those front-end kanbans to TDengine. The front-end kanban will need to be ported to Grafana to work correctly. -TDengine version 2.3.0.x only supports collectd and StatsD as data collection aggregation software but will provide more data collection aggregation software in the future. If you use other data aggregators on the collection side, your application needs to be ported to these two data aggregation systems to write data correctly. +TDengine version 2.3.0.x only supports collectd and StatsD as data collection and aggregation software but future versions will provide support for more data collection and aggregation software in the future. If you use other data aggregators on the collection side, your application needs to be ported to these two data aggregation systems to write data correctly. In addition to the two data aggregator software protocols mentioned above, TDengine also supports writing data directly via InfluxDB's line protocol and OpenTSDB's data writing protocol, JSON format. You can rewrite the logic on the data push side to write data using the line protocols supported by TDengine. -In addition, if your application uses the following features of OpenTSDB, you need to understand the following considerations before migrating your application to TDengine. +In addition, if your application uses the following features of OpenTSDB, you need to take into account the following considerations before migrating your application to TDengine. 1. `/api/stats`: If your application uses this feature to monitor the service status of OpenTSDB, and you have built the relevant logic to link the processing in your application, then this part of the status reading and fetching logic needs to be re-adapted to TDengine. TDengine provides a new mechanism for handling cluster state monitoring to meet the monitoring and maintenance needs of your application. -2. `/api/tree`: If you rely on this feature of OpenTSDB for the hierarchical organization and maintenance of timelines, you cannot migrate it directly to TDengine, which uses a database -> super table -> sub-table hierarchy to organize and maintain timelines, with all timelines belonging to the same super table in the same system hierarchy, but it is possible to simulate a logical multi-level structure of the application through the unique construction of different tag values. -3. `Rollup And PreAggregates`: The use of Rollup and PreAggregates requires the application to decide where to access the Rollup results and, in some scenarios, to access the actual results. The opacity of this structure makes the application processing logic extraordinarily complex and not portable at all. We think this strategy is a compromise when the time-series database does not. -TDengine does not support automatic downsampling of multiple timelines and preaggregation (for a range of periods) for the time being. Still, thanks to its high-performance query processing logic can provide very high-performance query responses without relying on Rollup and preaggregation (for a range of periods), making your application query processing logic much more straightforward. -The logic is much simpler. -4. `Rate`: TDengine provides two functions to calculate the rate of change of values, namely `Derivative` (the result is consistent with the Derivative behavior of InfluxDB) and `IRate` (the result is compatible with the IRate function in Prometheus). However, the results of these two functions are slightly different from Rate, but the functions are more powerful overall. In addition, TDengine supports all the calculation functions provided by OpenTSDB, and TDengine's query functions are much more potent than those supported by OpenTSDB, which can significantly simplify the processing logic of your application. +2. `/api/tree`: If you rely on this feature of OpenTSDB for the hierarchical organization and maintenance of timelines, you cannot migrate it directly to TDengine, which uses a database -> super table -> sub-table hierarchy to organize and maintain timelines, with all timelines belonging to the same super table in the same system hierarchy. But it is possible to simulate a logical multi-level structure of the application through the unique construction of different tag values. +3. `Rollup And PreAggregates`: The use of Rollup and PreAggregates requires the application to decide where to access the Rollup results and, in some scenarios, to access the actual results. The opacity of this structure makes the application processing logic extraordinarily complex and not portable at all. +While TDengine does not currently support automatic downsampling of multiple timelines and preaggregation (for a range of periods), thanks to its high-performance query processing logic, it can provide very high-performance query responses without relying on Rollup and preaggregation (for a range of periods). This makes your application query processing logic straightforward and simple. +4. `Rate`: TDengine provides two functions to calculate the rate of change of values, namely `Derivative` (the result is consistent with the Derivative behavior of InfluxDB) and `IRate` (the result is compatible with the IRate function in Prometheus). However, the results of these two functions are slightly different from that of Rate. But the TDengine functions are more powerful. In addition, TDengine supports all the calculation functions provided by OpenTSDB. TDengine's query functions are much more powerful than those supported by OpenTSDB, which can significantly simplify the processing logic of your application. -Through the above introduction, I believe you should be able to understand the changes brought about by the migration of OpenTSDB to TDengine. And this information will also help you correctly determine whether you would migrate your application to TDengine to experience the powerful and convenient time-series data processing capability provided by TDengine. +With the above introduction, we believe you should be able to understand the changes brought about by the migration of OpenTSDB to TDengine. And this information will also help you correctly determine whether you should migrate your application to TDengine to experience the powerful and convenient time-series data processing capability provided by TDengine. ### 2. Migration strategy suggestion -First, the OpenTSDB-based system migration involves data schema design, system scale estimation, and data write end transformation, data streaming, and application adaptation; after that, the two systems will run in parallel for a while and then migrate the historical data to TDengine. Of course, if your application has some functions that strongly depend on the above OpenTSDB features and you do not want to stop using them, you can migrate the historical data to TDengine. -You can consider keeping the original OpenTSDB system running while starting TDengine to provide the primary services. +OpenTSDB-based system migration involves data schema design, system scale estimation, data write transformation, data streaming, and application changes. The two systems should run in parallel for a while and then the historical data should be migrated to TDengine if your application has some functions that strongly depend on the above OpenTSDB features and you do not want to stop using them. +You can also consider keeping the original OpenTSDB system running while using TDengine to provide the primary services. ## Data model design @@ -129,16 +127,19 @@ Let us now assume a DevOps scenario where we use collectd to collect the underly | 2 | swap | value | double | host | swap_type | swap_type_instance | source | n/a | | 3 | disk | value | double | host | disk_point | disk_instance | disk_type | source | -TDengine requires the data stored to have a data schema, i.e., you need to create a super table and specify the schema of the super table before writing the data. For data schema creation, you have two ways to do this: 1) Take advantage of TDengine's native data writing support for OpenTSDB by calling the TDengine API to write (text line or JSON format) -and automate the creation of single-value models. This approach does not require significant adjustments to the data writing application, nor does it require converting the written data format. +TDengine requires the data stored to have a data schema, i.e., you need to create a super table and specify the schema of the super table before writing the data. For data schema creation, you have two ways to do this: +1) Take advantage of TDengine's native data writing support for OpenTSDB by calling the TDengine API to write (text line or JSON format) and automate the creation of single-value models. This approach does not require significant adjustments to the data writing application, nor does it require converting the written data format. At the C level, TDengine provides the `taos_schemaless_insert()` function to write data in OpenTSDB format directly (in early version this function was named `taos_insert_lines()`). Please refer to the sample code `schemaless.c` in the installation package directory as reference. -(2) based on a complete understanding of TDengine's data model, to establish the mapping relationship between OpenTSDB and TDengine's data model adjustment manually. Considering that OpenTSDB is a single-value mapping model, recommended using the single-value model in TDengine. TDengine can support both multi-value and single-value models. +(2) Based on a thorough understanding of TDengine's data model, establish a mapping between OpenTSDB and TDengine's data model. Considering that OpenTSDB is a single-value mapping model, we recommended using the single-value model in TDengine for simplicity. But keep in mind that TDengine supports both multi-value and single-value models. - **Single-valued model**. -The steps are as follows: use the name of the metrics as the name of the TDengine super table, which build with two basic data columns - timestamp and value, and the label of the super table is equivalent to the label information of the metrics, and the number of labels is equal to the number of labels of the metrics. The names of sub-tables are named with fixed rules: `metric + '_' + tags1_value + '_' + tag2_value + '_' + tag3_value ...` as the sub-table name. +The steps are as follows: +- Use the name of the metrics as the name of the TDengine super table +- Build with two basic data columns - timestamp and value. The label of the super table is equivalent to the label information of the metrics, and the number of labels is equal to the number of labels of the metrics. +- The names of sub-tables are named with fixed rules: `metric + '_' + tags1_value + '_' + tag2_value + '_' + tag3_value ...` as the sub-table name. Create 3 super tables in TDengine. @@ -158,13 +159,13 @@ The final system will have about 340 sub-tables and three super-tables. Note tha - **Multi-value model** -Suppose you want to take advantage of TDengine's multi-value modeling capabilities. In that case, you need first to meet the requirements that different collection quantities have the same collection frequency and can reach the **data write side simultaneously via a message queue**, thus ensuring writing multiple metrics at once using SQL statements. The metric's name is used as the name of the super table to create a multi-column model of data that has the same collection frequency and can arrive simultaneously. The names of the sub-tables are named using a fixed rule. Each of the above metrics contains only one measurement value, so converting it into a multi-value model is impossible. +Ideally you should take advantage of TDengine's multi-value modeling capabilities. In that case, you first need to meet the requirement that different collection quantities have the same collection frequency and can reach the **data write side simultaneously via a message queue**, thus ensuring writing multiple metrics at once, using SQL statements. The metric's name is used as the name of the super table to create a multi-column model of data that has the same collection frequency and can arrive simultaneously. The sub-tables are named using a fixed rule. Each of the above metrics contains only one measurement value, so converting it into a multi-value model is impossible. ## Data triage and application adaptation -Subscribe data from the message queue and start the adapted writer to write the data. +Subscribe to the message queue and start writing data to TDengine. -After writing the data starts for a while, you can use SQL statements to check whether the amount of data written meets the expected writing requirements. Use the following SQL statement to count the amount of data. +After data has been written for a while, you can use SQL statements to check whether the amount of data written meets the expected writing requirements. Use the following SQL statement to count the amount of data. ```sql select count(*) from memory @@ -184,7 +185,7 @@ To facilitate historical data migration, we provide a plug-in for the data synch For the specific usage of DataX and how to use DataX to write data to TDengine, please refer to [DataX-based TDengine Data Migration Tool](https://www.taosdata.com/blog/2021/10/26/3156.html). -After migrating via DataX, we found that we can significantly improve the efficiency of migrating historical data by starting multiple processes and migrating numerous metrics simultaneously. The following are some records of the migration process. I wish to use these for application migration as a reference. +After migrating via DataX, we found that we can significantly improve the efficiency of migrating historical data by starting multiple processes and migrating numerous metrics simultaneously. The following are some records of the migration process. We provide these as a reference for application migration. | Number of datax instances (number of concurrent processes) | Migration record speed (pieces/second) | | ----------------------------- | ------------------- -- | @@ -202,13 +203,13 @@ Suppose you need to use the multi-value model for data writing. In that case, yo Manual migration of data requires attention to the following two issues: -1) When storing the exported data on the disk, the disk needs to have enough storage space to accommodate the exported data files fully. Adopting the partial import mode to avoid the shortage of disk file storage after the total amount of data is exported. Preferentially export the timelines belonging to the same super table. Then the exported data files are imported into the TDengine system. +1) When storing the exported data on the disk, the disk needs to have enough storage space to accommodate the exported data files fully. To avoid running out of disk space, you can adopt a partial import mode in which you preferentially export the timelines belonging to the same super table and then only those files are imported into TDengine. -2) Under the full load of the system, if there are enough remaining computing and IO resources, establish a multi-threaded importing to maximize the efficiency of data migration. Considering the vast load that data parsing brings to the CPU, it is necessary to control the maximum number of parallel tasks to avoid the overall overload of the system triggered by importing historical data. +2) Under the full load of the system, if there are enough remaining computing and IO resources, establish a multi-threaded import to maximize the efficiency of data migration. Considering the vast load that data parsing brings to the CPU, it is necessary to control the maximum number of parallel tasks to avoid overloading the system when importing historical data. Due to the ease of operation of TDengine itself, there is no need to perform index maintenance and data format change processing in the entire process. The whole process only needs to be executed sequentially. -When wholly importing the historical data into TDengine, the two systems run simultaneously and then switch the query request to TDengine to achieve seamless application switching. +While importing historical data into TDengine, the two systems should run simultaneously. Once all the data is migrated, switch the query request to TDengine to achieve seamless application switching. ## Appendix 1: OpenTSDB query function correspondence table @@ -222,12 +223,12 @@ Example: SELECT avg(val) FROM (SELECT first(val) FROM super_table WHERE ts >= startTime and ts <= endTime INTERVAL(20s) Fill(linear)) INTERVAL(20s) ``` -Remark: +Remarks: 1. The value in Interval needs to be the same as the interval value in the outer query. -2. The interpolation processing in TDengine needs to use subqueries to assist in the completion. As shown above, it is enough to specify the interpolation type in the inner query. Since the interpolation of the values ​​in OpenTSDB uses linear interpolation, use fill( in the interpolation clause. linear) to declare the interpolation type. The following functions with the exact interpolation calculation requirements are processed by this method. -3. The parameter 20s in Interval indicates that the inner query will generate results according to a time window of 20 seconds. In an actual query, it needs to adjust to the time interval between different records. It ensures that producing interpolation results equivalent to the original data. -4. Due to the particular interpolation strategy and mechanism of OpenTSDB, the method of the first interpolation and then calculation in the aggregate query (Aggregate) makes the calculation results impossible to be utterly consistent with TDengine. But in the case of downsampling (Downsample), TDengine and OpenTSDB can obtain consistent results (since OpenTSDB performs aggregation and downsampling queries). +2. Interpolation processing in TDengine uses subqueries to assist in completion. As shown above, it is enough to specify the interpolation type in the inner query. Since OpenTSDB uses linear interpolation, use `fill(linear)` to declare the interpolation type in TDengine. Some of the functions mentioned below have exactly the same interpolation calculation requirements. +3. The parameter 20s in Interval indicates that the inner query will generate results according to a time window of 20 seconds. In an actual query, it needs to adjust to the time interval between different records. It ensures that interpolation results are equivalent to the original data. +4. Due to the particular interpolation strategy and mechanism of OpenTSDB i.e. interpolation followed by aggregate calculation, it is impossible for the results to be completely consistent with those of TDengine. But in the case of downsampling (Downsample), TDengine and OpenTSDB can obtain consistent results (since OpenTSDB performs aggregation and downsampling queries). ### Count @@ -261,7 +262,7 @@ Select apercentile(col1, 50, “t-digest”) from table_name Remark: -1. During the approximate query processing, OpenTSDB uses the t-digest algorithm by default, so in order to obtain the same calculation result, the algorithm used needs to be specified in the `apercentile()` function. TDengine can support two different approximation processing algorithms, declared by "default" and "t-digest" respectively. +1. When calculating estimate percentiles, OpenTSDB uses the t-digest algorithm by default. In order to obtain the same calculation results in TDengine, the algorithm used needs to be specified in the `apercentile()` function. TDengine can support two different percentile calculation algorithms named "default" and "t-digest" respectively. ### First @@ -379,35 +380,34 @@ We still use the hypothetical environment from Chapter 4. There are three measur ### Storage resource estimation Assuming that the number of sensor devices that generate data and need to be stored is `n`, the frequency of data generation is `t` per second, and the length of each record is `L` bytes, the scale of data generated per day is `n * t * L` bytes. Assuming the compression ratio is `C`, the daily data size is `(n * t * L)/C` bytes. The storage resources are estimated to accommodate the data scale for 1.5 years. In the production environment, the compression ratio C of TDengine is generally between 5 and 7. -With additional 20% ​​redundancy, you can calculate the required storage resources: +With additional 20% redundancy, you can calculate the required storage resources: ```matlab (n * t * L) * (365 * 1.5) * (1+20%)/C ```` - -Combined with the above calculation formula, bring the parameters into the formula, and the raw data scale generated every year is 11.8TB without considering the label information. Note that since tag information is associated with each timeline in TDengine, not every record. The scale of the amount of data to be recorded is somewhat reduced relative to the generated data, and this part of label data can be ignored as a whole. Assuming a compression ratio of 5, the size of the retained data ends up being 2.56 TB. +Substituting in the above formula, the raw data generated every year is 11.8TB without considering the label information. Note that tag information is associated with each timeline in TDengine, not every record. The amount of data to be recorded is somewhat reduced relative to the generated data, and label data can be ignored as a whole. Assuming a compression ratio of 5, the size of the retained data ends up being 2.56 TB. ### Storage Device Selection Considerations -The hard disk should be capable of better random read performance. Considering using an SSD as much as possible is a better choice. A disk with better random read performance is a great help to improve the system's query performance and improve the query response performance as a whole system. To obtain better query performance, the performance index of the single-threaded random read IOPS of the hard disk device should not be lower than 1000, and it is better to reach 5000 IOPS or more. Recommend to use `fio` utility software to evaluate the running performance (please refer to Appendix 1 for specific usage) for the random IO read of the current device to confirm whether it can meet the requirements of random read of large files. +A disk with better random read performance, such as an SSD, improves the system's query performance and improves the query response performance of the whole system. To obtain better query performance, the performance index of the single-threaded random read IOPS of the hard disk device should not be lower than 1000, and it is better to reach 5000 IOPS or more. We recommend using `fio` utility software to evaluate the running performance (please refer to Appendix 1 for specific usage) for the random IO read of the current device to confirm whether it can meet the requirements of random read of large files. Hard disk writing performance has little effect on TDengine. The TDengine writing process adopts the append write mode, so as long as it has good sequential write performance, both SAS hard disks and SSDs in the general sense can well meet TDengine's requirements for disk write performance. ### Computational resource estimates -Due to the particularity of IoT data, after the frequency of data generation is consistent, the writing process of TDengine maintains a relatively fixed amount of resource consumption (computing and storage). According to the [TDengine Operation and Maintenance Guide](/operation/) description, the system consumes less than 1 CPU core at 22,000 writes per second. +Due to the characteristics of IoT data, when the frequency of data generation is consistent, the writing process of TDengine maintains a relatively fixed amount of resource consumption (computing and storage). According to the [TDengine Operation and Maintenance Guide](/operation/) description, the system consumes less than 1 CPU core at 22,000 writes per second. -In estimating the CPU resources consumed by the query, assuming that the application requires the database to provide 10,000 QPS, the CPU time consumed by each query is about 1 ms. The query provided by each core per second is 1,000 QPS, which satisfies 10,000 QPS. The query request requires at least 10 cores. For the system as a whole system to have less than 50% CPU load, the entire cluster needs twice as many as 10 cores or 20 cores. +In estimating the CPU resources consumed by the query, assuming that the application requires the database to provide 10,000 QPS, the CPU time consumed by each query is about 1 ms. The query provided by each core per second is 1,000 QPS, which satisfies 10,000 QPS. The query request requires at least 10 cores. For the system as a whole system to have less than 50% CPU load, the entire cluster needs twice as many cores i.e. 20 cores. ### Memory resource estimation -The database allocates 16MB\*3 buffer memory for each Vnode by default. If the cluster system includes 22 CPU cores, TDengine will create 22 Vnodes (virtual nodes) by default. Each Vnode contains 1000 tables, which can accommodate all the tables. Then it takes about 1.5 hours to write a block, which triggers the drop, and no adjustment is required. A total of 22 Vnodes require about 1GB of memory cache. Considering the memory needed for the query, assuming that the memory overhead of each query is about 50MB, the memory required for 500 queries concurrently is about 25GB. +The database allocates 16MB\*3 buffer memory for each Vnode by default. If the cluster system includes 22 CPU cores, TDengine will create 22 Vnodes (virtual nodes) by default. Each Vnode contains 1000 tables, which is more than enough to accommodate all the tables in our hypothetical scenario. Then it takes about 1.5 hours to write a block, which triggers persistence to disk without requiring any adjustment. A total of 22 Vnodes require about 1GB of memory cache. Considering the memory needed for the query, assuming that the memory overhead of each query is about 50MB, the memory required for 500 queries concurrently is about 25GB. In summary, using a single 16-core 32GB machine or a cluster of 2 8-core 16GB machines is enough. ## Appendix 3: Cluster Deployment and Startup -TDengine provides a wealth of help documents to explain many aspects of cluster installation and deployment. Here is the list of corresponding document for your reference. +TDengine provides a wealth of help documents to explain many aspects of cluster installation and deployment. Here is the list of documents for your reference. ### Cluster Deployment @@ -421,7 +421,7 @@ To ensure that the system can obtain the necessary information for regular opera FQDN, firstEp, secondEP, dataDir, logDir, tmpDir, serverPort. For the specific meaning and setting requirements of each parameter, please refer to the document "[TDengine Cluster Installation and Management](/cluster/)" -Follow the same steps to set parameters on the nodes that need running, start the taosd service, and then add Dnodes to the cluster. +Follow the same steps to set parameters on the other nodes, start the taosd service, and then add Dnodes to the cluster. Finally, start `taos` and execute the `show dnodes` command. If you can see all the nodes that have joined the cluster, the cluster building process was successfully completed. For specific operation procedures and precautions, please refer to the document "[TDengine Cluster Installation and Management](/cluster/)". diff --git a/docs-en/27-train-faq/01-faq.md b/docs-en/27-train-faq/01-faq.md index 439775170937ef11fc964914232b2739d688b26f..e182e25b9e98bad11b9c90146400e3720605489e 100644 --- a/docs-en/27-train-faq/01-faq.md +++ b/docs-en/27-train-faq/01-faq.md @@ -5,38 +5,38 @@ title: Frequently Asked Questions ## Submit an Issue -If the tips in FAQ don't help much, please submit an issue on [GitHub](https://github.com/taosdata/TDengine) to describe your problem description, including TDengine version, hardware and OS information, the steps to reproduce the problem, etc. It would be very helpful if you package the contents in `/var/log/taos` and `/etc/taos` and upload. These two are the default directories used by TDengine, if they have been changed in your configuration, please use according to the actual configuration. It's recommended to firstly set `debugFlag` to 135 in `taos.cfg`, restart `taosd`, then reproduce the problem and collect logs. If you don't want to restart, an alternative way of setting `debugFlag` is executing `alter dnode debugFlag 135` command in TDengine CLI `taos`. During normal running, however, please make sure `debugFlag` is set to 131. +If the tips in FAQ don't help much, please submit an issue on [GitHub](https://github.com/taosdata/TDengine) to describe your problem. In your description please include the TDengine version, hardware and OS information, the steps to reproduce the problem and any other relevant information. It would be very helpful if you can package the contents in `/var/log/taos` and `/etc/taos` and upload. These two are the default directories used by TDengine. If you have changed the default directories in your configuration, please package the files in your configured directories. We recommended setting `debugFlag` to 135 in `taos.cfg`, restarting `taosd`, then reproducing the problem and collecting the logs. If you don't want to restart, an alternative way of setting `debugFlag` is executing `alter dnode debugFlag 135` command in TDengine CLI `taos`. During normal running, however, please make sure `debugFlag` is set to 131. ## Frequently Asked Questions ### 1. How to upgrade to TDengine 2.0 from older version? -version 2.x is not compatible with version 1.x regarding configuration file and data file, please do following before upgrading: +version 2.x is not compatible with version 1.x. With regard to the configuration and data files, please perform the following steps before upgrading. Please follow data integrity, security, backup and other relevant SOPs, best practices before removing/deleting any data. -1. Delete configuration files: `sudo rm -rf /etc/taos/taos.cfg` +1. Delete configuration files: `sudo rm -rf /etc/taos/taos.cfg` 2. Delete log files: `sudo rm -rf /var/log/taos/` 3. Delete data files if the data doesn't need to be kept: `sudo rm -rf /var/lib/taos/` -4. Install latests 2.x version -5. If the data needs to be kept and migrated to newer version, please contact professional service of TDengine for assistance +4. Install latest 2.x version +5. If the data needs to be kept and migrated to newer version, please contact professional service at TDengine for assistance. ### 2. How to handle "Unable to establish connection"? -When the client is unable to connect to the server, you can try following ways to find out why. +When the client is unable to connect to the server, you can try the following ways to troubleshoot and resolve the problem. 1. Check the network - - Check if the hosts where the client and server are running can be accessible to each other, for example by `ping` command. - - Check if the TCP/UDP on port 6030-6042 are open for access if firewall is enabled. It's better to firstly disable firewall for diagnostics. - - Check if the FQDN and serverPort are configured correctly in `taos.cfg` used by the server side - - Check if the `firstEp` is set properly in the `taos.cfg` used by the client side + - Check if the hosts where the client and server are running are accessible to each other, for example by `ping` command. + - Check if the TCP/UDP on port 6030-6042 are open for access if firewall is enabled. If possible, disable the firewall for diagnostics, but please ensure that you are following security and other relevant protocols. + - Check if the FQDN and serverPort are configured correctly in `taos.cfg` used by the server side. + - Check if the `firstEp` is set properly in the `taos.cfg` used by the client side. 2. Make sure the client version and server version are same. 3. On server side, check the running status of `taosd` by executing `systemctl status taosd` . If your server is started using another way instead of `systemctl`, use the proper method to check whether the server process is running normally. -4. If using connector of Python, Java, Go, Rust, C#, node.JS on Linux to connect toe the server, please make sure `libtaos.so` is in directory `/usr/local/taos/driver` and `/usr/local/taos/driver` is in system lib search environment variable `LD_LIBRARY_PATH`. +4. If using connector of Python, Java, Go, Rust, C#, node.JS on Linux to connect to the server, please make sure `libtaos.so` is in directory `/usr/local/taos/driver` and `/usr/local/taos/driver` is in system lib search environment variable `LD_LIBRARY_PATH`. -5. If using connector on Windows, please make sure `C:\TDengine\driver\taos.dll` is in your system lib search path, it's suggested to put `taos.dll` under `C:\Windows\System32`. +5. If using connector on Windows, please make sure `C:\TDengine\driver\taos.dll` is in your system lib search path. We recommend putting `taos.dll` under `C:\Windows\System32`. 6. Some advanced network diagnostics tools @@ -45,7 +45,7 @@ When the client is unable to connect to the server, you can try following ways t Check whether a TCP port on server side is open: `nc -l {port}` Check whether a TCP port on client side is open: `nc {hostIP} {port}` - - On Windows system `Net-TestConnection -ComputerName {fqdn} -Port {port}` on PowerShell can be used to check whether the port on serer side is open for access. + - On Windows system `Net-TestConnection -ComputerName {fqdn} -Port {port}` on PowerShell can be used to check whether the port on server side is open for access. 7. TDengine CLI `taos` can also be used to check network, please refer to [TDengine CLI](/reference/taos-shell). diff --git a/docs-en/27-train-faq/03-docker.md b/docs-en/27-train-faq/03-docker.md index 8f27c35d7945043d39ad83626ceccee941ad135e..afee13c1377b0b4331d6f7ec20251d1aa2db81a1 100644 --- a/docs-en/27-train-faq/03-docker.md +++ b/docs-en/27-train-faq/03-docker.md @@ -3,15 +3,15 @@ sidebar_label: TDengine in Docker title: Deploy TDengine in Docker --- -Even though it's not recommended to deploy TDengine using docker in production system, docker is still very useful in development environment, especially when your host is not Linux. From version 2.0.14.0, the official image of TDengine can support X86-64, X86, arm64, and rm32 . +We do not recommend deploying TDengine using Docker in a production system. However, Docker is still very useful in a development environment, especially when your host is not Linux. From version 2.0.14.0, the official image of TDengine can support X86-64, X86, arm64, and rm32 . -In this chapter a simple step by step guide of using TDengine in docker is introduced. +In this chapter we introduce a simple step by step guide to use TDengine in Docker. ## Install Docker -The installation of docker please refer to [Get Docker](https://docs.docker.com/get-docker/). +To install Docker please refer to [Get Docker](https://docs.docker.com/get-docker/). -After docker is installed, you can check whether Docker is installed properly by displaying Docker version. +After Docker is installed, you can check whether Docker is installed properly by displaying Docker version. ```bash $ docker -v @@ -27,7 +27,7 @@ $ docker run -d -p 6030-6049:6030-6049 -p 6030-6049:6030-6049/udp tdengine/tdeng 526aa188da767ae94b244226a2b2eec2b5f17dd8eff592893d9ec0cd0f3a1ccd ``` -In the above command, a docker container is started to run TDengine server, the port range 6030-6049 of the container is mapped to host port range 6030-6049. If port range 6030-6049 has been occupied on the host, please change to an available host port range. Regarding the requirements about ports on the host, please refer to [Port Configuration](/reference/config/#serverport). +In the above command, a docker container is started to run TDengine server, the port range 6030-6049 of the container is mapped to host port range 6030-6049. If port range 6030-6049 has been occupied on the host, please change to an available host port range. For port requirements on the host, please refer to [Port Configuration](/reference/config/#serverport). - **docker run**: Launch a docker container - **-d**: the container will run in background mode @@ -95,7 +95,7 @@ In TDengine CLI, SQL commands can be executed to create/drop databases, tables, ### Access TDengine from host -If `-p` used to map ports properly between host and container, it's also able to access TDengine in container from the host as long as `firstEp` is configured correctly for the client on host. +If option `-p` used to map ports properly between host and container, it's also able to access TDengine in container from the host as long as `firstEp` is configured correctly for the client on host. ``` $ taos @@ -271,7 +271,7 @@ Below is an example output: ### Access TDengine from 3rd party tools -A lot of 3rd party tools can be used to write data into TDengine through `taosAdapter` , for details please refer to [3rd party tools](/third-party/). +A lot of 3rd party tools can be used to write data into TDengine through `taosAdapter`, for details please refer to [3rd party tools](/third-party/). There is nothing different from the 3rd party side to access TDengine server inside a container, as long as the end point is specified correctly, the end point should be the FQDN and the mapped port of the host. diff --git a/packaging/docker/Dockerfile b/packaging/docker/Dockerfile index 26349e257676d99d0ea81e03509c8b09c20a2248..35bea0e65ccc5070fe9d4e82adadc7132ae7cc81 100644 --- a/packaging/docker/Dockerfile +++ b/packaging/docker/Dockerfile @@ -1,32 +1,25 @@ -FROM ubuntu:18.04 - -WORKDIR /root - -ARG pkgFile -ARG dirName -ARG cpuType -RUN echo ${pkgFile} && echo ${dirName} - -COPY ${pkgFile} /root/ -RUN tar -zxf ${pkgFile} -WORKDIR /root/ -RUN cd /root/${dirName}/ && /bin/bash install.sh -e no && cd /root -RUN rm /root/${pkgFile} -RUN rm -rf /root/${dirName} - -ENV DEBIAN_FRONTEND=noninteractive -RUN apt-get clean && apt-get update && apt-get install -y locales tzdata netcat && locale-gen en_US.UTF-8 -ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/lib" \ - LC_CTYPE=en_US.UTF-8 \ - LANG=en_US.UTF-8 \ - LC_ALL=en_US.UTF-8 - -COPY ./bin/* /usr/bin/ - -ENV TINI_VERSION v0.19.0 -RUN bash -c 'echo -e "Downloading tini-${cpuType} ..."' -ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini-${cpuType} /tini -RUN chmod +x /tini -ENTRYPOINT ["/tini", "--", "/usr/bin/entrypoint.sh"] -CMD ["taosd"] -VOLUME [ "/var/lib/taos", "/var/log/taos", "/corefile" ] +FROM ubuntu:18.04 + +WORKDIR /root + +ARG pkgFile +ARG dirName +ARG cpuType +RUN echo ${pkgFile} && echo ${dirName} + +COPY ${pkgFile} /root/ +ENV TINI_VERSION v0.19.0 +ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini-${cpuType} /tini +ENV DEBIAN_FRONTEND=noninteractive +WORKDIR /root/ +RUN tar -zxf ${pkgFile} && cd /root/${dirName}/ && /bin/bash install.sh -e no && cd /root && rm /root/${pkgFile} && rm -rf /root/${dirName} && apt-get update && apt-get install -y locales tzdata netcat && locale-gen en_US.UTF-8 && apt-get clean && rm -rf /var/lib/apt/lists/ && chmod +x /tini + +ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/lib" \ + LC_CTYPE=en_US.UTF-8 \ + LANG=en_US.UTF-8 \ + LC_ALL=en_US.UTF-8 +COPY ./bin/* /usr/bin/ + +ENTRYPOINT ["/tini", "--", "/usr/bin/entrypoint.sh"] +CMD ["taosd"] +VOLUME [ "/var/lib/taos", "/var/log/taos", "/corefile" ] diff --git a/packaging/docker/bin/entrypoint.sh b/packaging/docker/bin/entrypoint.sh index 5fb441004d8b454de1039eb3f4b23eb51f32be64..f4be349c0de0ea0df382fc6fee033120c5c48007 100644 --- a/packaging/docker/bin/entrypoint.sh +++ b/packaging/docker/bin/entrypoint.sh @@ -11,39 +11,22 @@ DISABLE_ADAPTER=${TAOS_DISABLE_ADAPTER:-0} unset TAOS_DISABLE_ADAPTER # to get mnodeEpSet from data dir -DATA_DIR=${TAOS_DATA_DIR:-/var/lib/taos} +DATA_DIR=$(taosd -C|grep -E 'dataDir.*(\S+)' -o |head -n1|sed 's/dataDir *//') +DATA_DIR=${DATA_DIR:-/var/lib/taos} -# append env to custom taos.cfg -CFG_DIR=/tmp/taos -CFG_FILE=$CFG_DIR/taos.cfg - -mkdir -p $CFG_DIR >/dev/null 2>&1 - -[ -f /etc/taos/taos.cfg ] && cat /etc/taos/taos.cfg | grep -E -v "^#|^\s*$" >$CFG_FILE -env-to-cfg >>$CFG_FILE - -FQDN=$(cat $CFG_FILE | grep -E -v "^#|^$" | grep fqdn | tail -n1 | sed -E 's/.*fqdn\s+//') +FQDN=$(taosd -C|grep -E 'fqdn.*(\S+)' -o |head -n1|sed 's/fqdn *//') # ensure the fqdn is resolved as localhost grep "$FQDN" /etc/hosts >/dev/null || echo "127.0.0.1 $FQDN" >>/etc/hosts - +FIRSET_EP=$(taosd -C|grep -E 'firstEp.*(\S+)' -o |head -n1|sed 's/firstEp *//') # parse first ep host and port -FIRST_EP_HOST=${TAOS_FIRST_EP%:*} -FIRST_EP_PORT=${TAOS_FIRST_EP#*:} +FIRST_EP_HOST=${FIRSET_EP%:*} +FIRST_EP_PORT=${FIRSET_EP#*:} # in case of custom server port -SERVER_PORT=$(cat $CFG_FILE | grep -E -v "^#|^$" | grep serverPort | tail -n1 | sed -E 's/.*serverPort\s+//') +SERVER_PORT=$(taosd -C|grep -E 'serverPort.*(\S+)' -o |head -n1|sed 's/serverPort *//') SERVER_PORT=${SERVER_PORT:-6030} -# for other binaries like interpreters -if echo $1 | grep -E "taosd$" - >/dev/null; then - true # will run taosd -else - cp -f $CFG_FILE /etc/taos/taos.cfg || true - $@ - exit $? -fi - set +e ulimit -c unlimited # set core files pattern, maybe failed @@ -62,22 +45,23 @@ fi # if has mnode ep set or the host is first ep or not for cluster, just start. if [ -f "$DATA_DIR/dnode/mnodeEpSet.json" ] || [ "$TAOS_FQDN" = "$FIRST_EP_HOST" ]; then - $@ -c $CFG_DIR + $@ # others will first wait the first ep ready. else if [ "$TAOS_FIRST_EP" = "" ]; then echo "run TDengine with single node." - $@ -c $CFG_DIR + $@ exit $? fi while true; do - es=0 - taos -h $FIRST_EP_HOST -P $FIRST_EP_PORT -n startup >/dev/null || es=$? - if [ "$es" -eq 0 ]; then + es=$(taos -h $FIRST_EP_HOST -P $FIRST_EP_PORT --check) + echo ${es} + if [ "${es%%:*}" -eq 2 ]; then + echo "execute create dnode" taos -h $FIRST_EP_HOST -P $FIRST_EP_PORT -s "create dnode \"$FQDN:$SERVER_PORT\";" break fi sleep 1s done - $@ -c $CFG_DIR + $@ fi diff --git a/packaging/docker/bin/taos-check b/packaging/docker/bin/taos-check new file mode 100644 index 0000000000000000000000000000000000000000..5dc06b6018b93b627610b446ca6363773fd0fd72 --- /dev/null +++ b/packaging/docker/bin/taos-check @@ -0,0 +1,8 @@ +#!/bin/sh +es=$(taos --check) +code=${es%%:*} +if [ "$code" -ne "0" ] && [ "$code" -ne "4" ]; then + exit 0 +fi +echo $es +exit 1 diff --git a/source/dnode/mnode/impl/test/db/CMakeLists.txt b/source/dnode/mnode/impl/test/db/CMakeLists.txt index 3f6a80835ffa7b2a0a6fcdcff21e1cfd39a02c5f..e28cdd4f61824c04f62513868a9010113140fd31 100644 --- a/source/dnode/mnode/impl/test/db/CMakeLists.txt +++ b/source/dnode/mnode/impl/test/db/CMakeLists.txt @@ -5,7 +5,9 @@ target_link_libraries( PUBLIC sut ) -add_test( - NAME dbTest - COMMAND dbTest -) +if(NOT TD_WINDOWS) + add_test( + NAME dbTest + COMMAND dbTest + ) +endif(NOT TD_WINDOWS) diff --git a/source/dnode/mnode/impl/test/sma/CMakeLists.txt b/source/dnode/mnode/impl/test/sma/CMakeLists.txt index 3f9ec123a80e88371a98fa54c99342726831372d..fd596c5021674bb9d4ec185924129b0fd3bbade8 100644 --- a/source/dnode/mnode/impl/test/sma/CMakeLists.txt +++ b/source/dnode/mnode/impl/test/sma/CMakeLists.txt @@ -5,7 +5,9 @@ target_link_libraries( PUBLIC sut ) -add_test( - NAME smaTest - COMMAND smaTest -) +if(NOT TD_WINDOWS) + add_test( + NAME smaTest + COMMAND smaTest + ) +endif(NOT TD_WINDOWS) diff --git a/source/dnode/mnode/impl/test/stb/CMakeLists.txt b/source/dnode/mnode/impl/test/stb/CMakeLists.txt index d2fe3879979f4f52a215a3d44e25e912be3abb90..857c404c1c299767685fa1572a7f5a0b6463c939 100644 --- a/source/dnode/mnode/impl/test/stb/CMakeLists.txt +++ b/source/dnode/mnode/impl/test/stb/CMakeLists.txt @@ -5,7 +5,9 @@ target_link_libraries( PUBLIC sut ) -add_test( - NAME stbTest - COMMAND stbTest -) +if(NOT TD_WINDOWS) + add_test( + NAME stbTest + COMMAND stbTest + ) +endif(NOT TD_WINDOWS) \ No newline at end of file diff --git a/source/libs/scalar/test/scalar/CMakeLists.txt b/source/libs/scalar/test/scalar/CMakeLists.txt index 672cb5a3de39bfed51c9d399ac3d0431614f50ab..86b936d93ae950e27069835cffcb0e8a99768ac9 100644 --- a/source/libs/scalar/test/scalar/CMakeLists.txt +++ b/source/libs/scalar/test/scalar/CMakeLists.txt @@ -17,9 +17,7 @@ TARGET_INCLUDE_DIRECTORIES( PUBLIC "${TD_SOURCE_DIR}/source/libs/parser/inc" PRIVATE "${TD_SOURCE_DIR}/source/libs/scalar/inc" ) -if(NOT TD_WINDOWS) - add_test( - NAME scalarTest - COMMAND scalarTest - ) -endif(NOT TD_WINDOWS) +add_test( + NAME scalarTest + COMMAND scalarTest +) diff --git a/source/libs/scalar/test/scalar/scalarTests.cpp b/source/libs/scalar/test/scalar/scalarTests.cpp index 3fafc83b18365d490003a792748606c8d4fce804..6a32c6577532c7c7bbb3f07e0f012706dd7163df 100644 --- a/source/libs/scalar/test/scalar/scalarTests.cpp +++ b/source/libs/scalar/test/scalar/scalarTests.cpp @@ -2498,7 +2498,7 @@ TEST(ScalarFunctionTest, tanFunction_column) { code = tanFunction(pInput, 1, pOutput); ASSERT_EQ(code, TSDB_CODE_SUCCESS); for (int32_t i = 0; i < rowNum; ++i) { - ASSERT_EQ(*((double *)colDataGetData(pOutput->columnData, i)), result[i]); + ASSERT_NEAR(*((double *)colDataGetData(pOutput->columnData, i)), result[i], 1e-15); PRINTF("tiny_int after TAN:%f\n", *((double *)colDataGetData(pOutput->columnData, i))); } scltDestroyDataBlock(pInput); @@ -2517,7 +2517,7 @@ TEST(ScalarFunctionTest, tanFunction_column) { code = tanFunction(pInput, 1, pOutput); ASSERT_EQ(code, TSDB_CODE_SUCCESS); for (int32_t i = 0; i < rowNum; ++i) { - ASSERT_EQ(*((double *)colDataGetData(pOutput->columnData, i)), result[i]); + ASSERT_NEAR(*((double *)colDataGetData(pOutput->columnData, i)), result[i], 1e-15); PRINTF("float after TAN:%f\n", *((double *)colDataGetData(pOutput->columnData, i))); } diff --git a/tests/system-test/2-query/unique.py b/tests/system-test/2-query/unique.py new file mode 100644 index 0000000000000000000000000000000000000000..227efa6f9ceda24df73830bd46838fd657b67d48 --- /dev/null +++ b/tests/system-test/2-query/unique.py @@ -0,0 +1,457 @@ +from math import floor +from random import randint, random +from numpy import equal +import taos +import sys +import datetime +import inspect + +from util.log import * +from util.sql import * +from util.cases import * + +class TDTestCase: + updatecfgDict = {'debugFlag': 143 ,"cDebugFlag":143,"uDebugFlag":143 ,"rpcDebugFlag":143 , "tmrDebugFlag":143 , + "jniDebugFlag":143 ,"simDebugFlag":143,"dDebugFlag":143, "dDebugFlag":143,"vDebugFlag":143,"mDebugFlag":143,"qDebugFlag":143, + "wDebugFlag":143,"sDebugFlag":143,"tsdbDebugFlag":143,"tqDebugFlag":143 ,"fsDebugFlag":143 ,"fnDebugFlag":143} + + def init(self, conn, logSql): + tdLog.debug(f"start to excute {__file__}") + tdSql.init(conn.cursor()) + + def prepare_datas(self): + tdSql.execute( + '''create table stb1 + (ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(16),c9 nchar(32), c10 timestamp) + tags (t1 int) + ''' + ) + + tdSql.execute( + ''' + create table t1 + (ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(16),c9 nchar(32), c10 timestamp) + ''' + ) + for i in range(4): + tdSql.execute(f'create table ct{i+1} using stb1 tags ( {i+1} )') + + for i in range(9): + tdSql.execute( + f"insert into ct1 values ( now()-{i*10}s, {1*i}, {11111*i}, {111*i}, {11*i}, {1.11*i}, {11.11*i}, {i%2}, 'binary{i}', 'nchar{i}', now()+{1*i}a )" + ) + tdSql.execute( + f"insert into ct4 values ( now()-{i*90}d, {1*i}, {11111*i}, {111*i}, {11*i}, {1.11*i}, {11.11*i}, {i%2}, 'binary{i}', 'nchar{i}', now()+{1*i}a )" + ) + tdSql.execute("insert into ct1 values (now()-45s, 0, 0, 0, 0, 0, 0, 0, 'binary0', 'nchar0', now()+8a )") + tdSql.execute("insert into ct1 values (now()+10s, 9, -99999, -999, -99, -9.99, -99.99, 1, 'binary9', 'nchar9', now()+9a )") + tdSql.execute("insert into ct1 values (now()+15s, 9, -99999, -999, -99, -9.99, NULL, 1, 'binary9', 'nchar9', now()+9a )") + tdSql.execute("insert into ct1 values (now()+20s, 9, -99999, -999, NULL, -9.99, -99.99, 1, 'binary9', 'nchar9', now()+9a )") + + tdSql.execute("insert into ct4 values (now()-810d, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL ) ") + tdSql.execute("insert into ct4 values (now()-400d, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL ) ") + tdSql.execute("insert into ct4 values (now()+90d, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL ) ") + + tdSql.execute( + f'''insert into t1 values + ( '2020-04-21 01:01:01.000', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL ) + ( '2020-10-21 01:01:01.000', 1, 11111, 111, 11, 1.11, 11.11, 1, "binary1", "nchar1", now()+1a ) + ( '2020-12-31 01:01:01.000', 2, 22222, 222, 22, 2.22, 22.22, 0, "binary2", "nchar2", now()+2a ) + ( '2021-01-01 01:01:06.000', 3, 33333, 333, 33, 3.33, 33.33, 0, "binary3", "nchar3", now()+3a ) + ( '2021-05-07 01:01:10.000', 4, 44444, 444, 44, 4.44, 44.44, 1, "binary4", "nchar4", now()+4a ) + ( '2021-07-21 01:01:01.000', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL ) + ( '2021-09-30 01:01:16.000', 5, 55555, 555, 55, 5.55, 55.55, 0, "binary5", "nchar5", now()+5a ) + ( '2022-02-01 01:01:20.000', 6, 66666, 666, 66, 6.66, 66.66, 1, "binary6", "nchar6", now()+6a ) + ( '2022-10-28 01:01:26.000', 7, 00000, 000, 00, 0.00, 00.00, 1, "binary7", "nchar7", "1970-01-01 08:00:00.000" ) + ( '2022-12-01 01:01:30.000', 8, -88888, -888, -88, -8.88, -88.88, 0, "binary8", "nchar8", "1969-01-01 01:00:00.000" ) + ( '2022-12-31 01:01:36.000', 9, -99999999999999999, -999, -99, -9.99, -999999999999999999999.99, 1, "binary9", "nchar9", "1900-01-01 00:00:00.000" ) + ( '2023-02-21 01:01:01.000', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL ) + ''' + ) + + def test_errors(self): + error_sql_lists = [ + "select unique from t1", + "select unique(123--123)==1 from t1", + "select unique(123,123) from t1", + "select unique(c1,ts) from t1", + "select unique(c1,c1,ts) from t1", + "select unique(c1) as 'd1' from t1", + "select unique(c1 ,c2 ) from t1", + "select unique(c1 ,NULL) from t1", + "select unique(,) from t1;", + "select unique(floor(c1) ab from t1)", + "select unique(c1) as int from t1", + "select unique('c1') from t1", + "select unique(NULL) from t1", + "select unique('') from t1", + "select unique(c%) from t1", + "select unique(t1) from t1", + "select unique(True) from t1", + "select unique(c1) , count(c1) from t1", + "select unique(c1) , avg(c1) from t1", + "select unique(c1) , min(c1) from t1", + "select unique(c1) , spread(c1) from t1", + "select unique(c1) , diff(c1) from t1", + "select unique(c1) , abs(c1) from t1", + "select unique(c1) , c1 from t1", + "select unique from stb1 partition by tbname", + "select unique(123--123)==1 from stb1 partition by tbname", + "select unique(123) from stb1 partition by tbname", + "select unique(c1,ts) from stb1 partition by tbname", + "select unique(c1,c1,ts) from stb1 partition by tbname", + "select unique(c1) as 'd1' from stb1 partition by tbname", + "select unique(c1 ,c2 ) from stb1 partition by tbname", + "select unique(c1 ,NULL) from stb1 partition by tbname", + "select unique(,) from stb1 partition by tbname;", + "select unique(floor(c1) ab from stb1 partition by tbname)", + "select unique(c1) as int from stb1 partition by tbname", + "select unique('c1') from stb1 partition by tbname", + "select unique(NULL) from stb1 partition by tbname", + "select unique('') from stb1 partition by tbname", + "select unique(c%) from stb1 partition by tbname", + #"select unique(t1) from stb1 partition by tbname", + "select unique(True) from stb1 partition by tbname", + "select unique(c1) , count(c1) from stb1 partition by tbname", + "select unique(c1) , avg(c1) from stb1 partition by tbname", + "select unique(c1) , min(c1) from stb1 partition by tbname", + "select unique(c1) , spread(c1) from stb1 partition by tbname", + "select unique(c1) , diff(c1) from stb1 partition by tbname", + "select unique(c1) , abs(c1) from stb1 partition by tbname", + "select unique(c1) , c1 from stb1 partition by tbname" + + ] + for error_sql in error_sql_lists: + tdSql.error(error_sql) + pass + + def support_types(self): + other_no_value_types = [ + "select unique(ts) from t1" , + "select unique(c7) from t1", + "select unique(c8) from t1", + "select unique(c9) from t1", + "select unique(ts) from ct1" , + "select unique(c7) from ct1", + "select unique(c8) from ct1", + "select unique(c9) from ct1", + "select unique(ts) from ct3" , + "select unique(c7) from ct3", + "select unique(c8) from ct3", + "select unique(c9) from ct3", + "select unique(ts) from ct4" , + "select unique(c7) from ct4", + "select unique(c8) from ct4", + "select unique(c9) from ct4", + "select unique(ts) from stb1 partition by tbname" , + "select unique(c7) from stb1 partition by tbname", + "select unique(c8) from stb1 partition by tbname", + "select unique(c9) from stb1 partition by tbname" + ] + + for type_sql in other_no_value_types: + tdSql.query(type_sql) + tdLog.info("support type ok , sql is : %s"%type_sql) + + type_sql_lists = [ + "select unique(c1) from t1", + "select unique(c2) from t1", + "select unique(c3) from t1", + "select unique(c4) from t1", + "select unique(c5) from t1", + "select unique(c6) from t1", + + "select unique(c1) from ct1", + "select unique(c2) from ct1", + "select unique(c3) from ct1", + "select unique(c4) from ct1", + "select unique(c5) from ct1", + "select unique(c6) from ct1", + + "select unique(c1) from ct3", + "select unique(c2) from ct3", + "select unique(c3) from ct3", + "select unique(c4) from ct3", + "select unique(c5) from ct3", + "select unique(c6) from ct3", + + "select unique(c1) from stb1 partition by tbname", + "select unique(c2) from stb1 partition by tbname", + "select unique(c3) from stb1 partition by tbname", + "select unique(c4) from stb1 partition by tbname", + "select unique(c5) from stb1 partition by tbname", + "select unique(c6) from stb1 partition by tbname", + + "select unique(c6) as alisb from stb1 partition by tbname", + "select unique(c6) alisb from stb1 partition by tbname", + ] + + for type_sql in type_sql_lists: + tdSql.query(type_sql) + + def check_unique_table(self , unique_sql): + # unique_sql = "select unique(c1) from ct1" + origin_sql = unique_sql.replace("unique(","").replace(")","") + tdSql.query(unique_sql) + unique_result = tdSql.queryResult + + unique_datas = [] + for elem in unique_result: + unique_datas.append(elem[0]) + + + tdSql.query(origin_sql) + origin_result = tdSql.queryResult + origin_datas = [] + for elem in origin_result: + origin_datas.append(elem[0]) + + pre_unique = [] + for elem in origin_datas: + if elem in pre_unique: + continue + else: + pre_unique.append(elem) + + if pre_unique == unique_datas: + tdLog.info(" unique query check pass , unique sql is: %s" %unique_sql) + else: + tdLog.exit(" unique query check fail , unique sql is: %s " %unique_sql) + + def basic_unique_function(self): + + # basic query + tdSql.query("select c1 from ct3") + tdSql.checkRows(0) + tdSql.query("select c1 from t1") + tdSql.checkRows(12) + tdSql.query("select c1 from stb1") + tdSql.checkRows(25) + + # used for empty table , ct3 is empty + tdSql.query("select unique(c1) from ct3") + tdSql.checkRows(0) + tdSql.query("select unique(c2) from ct3") + tdSql.checkRows(0) + tdSql.query("select unique(c3) from ct3") + tdSql.checkRows(0) + tdSql.query("select unique(c4) from ct3") + tdSql.checkRows(0) + tdSql.query("select unique(c5) from ct3") + tdSql.checkRows(0) + tdSql.query("select unique(c6) from ct3") + + # will support _rowts mix with + # tdSql.query("select unique(c6),_rowts from ct3") + + # auto check for t1 table + # used for regular table + tdSql.query("select unique(c1) from t1") + + tdSql.query("desc t1") + col_lists_rows = tdSql.queryResult + col_lists = [] + for col_name in col_lists_rows: + col_lists.append(col_name[0]) + + for col in col_lists: + self.check_unique_table(f"select unique({col}) from t1") + + # unique with super tags + + tdSql.query("select unique(c1) from ct1") + tdSql.checkRows(10) + + tdSql.query("select unique(c1) from ct4") + tdSql.checkRows(10) + + tdSql.error("select unique(c1),tbname from ct1") + tdSql.error("select unique(c1),t1 from ct1") + + # unique with common col + tdSql.error("select unique(c1) ,ts from ct1") + tdSql.error("select unique(c1) ,c1 from ct1") + + # unique with scalar function + tdSql.error("select unique(c1) ,abs(c1) from ct1") + tdSql.error("select unique(c1) , unique(c2) from ct1") + tdSql.error("select unique(c1) , abs(c2)+2 from ct1") + + + # unique with aggregate function + tdSql.error("select unique(c1) ,sum(c1) from ct1") + tdSql.error("select unique(c1) ,max(c1) from ct1") + tdSql.error("select unique(c1) ,csum(c1) from ct1") + tdSql.error("select unique(c1) ,count(c1) from ct1") + + # unique with filter where + tdSql.query("select unique(c1) from ct4 where c1 is null") + tdSql.checkData(0, 0, None) + + tdSql.query("select unique(c1) from ct4 where c1 >2 ") + tdSql.checkData(0, 0, 8) + tdSql.checkData(1, 0, 7) + tdSql.checkData(2, 0, 6) + tdSql.checkData(5, 0, 3) + + tdSql.query("select unique(c1) from ct4 where c2 between 0 and 99999") + tdSql.checkData(0, 0, 8) + tdSql.checkData(1, 0, 7) + tdSql.checkData(2, 0, 6) + tdSql.checkData(3, 0, 5) + tdSql.checkData(4, 0, 4) + tdSql.checkData(5, 0, 3) + tdSql.checkData(6, 0, 2) + tdSql.checkData(7, 0, 1) + tdSql.checkData(8, 0, 0) + + # unique with union all + tdSql.query("select unique(c1) from ct4 union all select c1 from ct1") + tdSql.checkRows(23) + tdSql.query("select unique(c1) from ct4 union all select distinct(c1) from ct4") + tdSql.checkRows(20) + tdSql.query("select unique(c2) from ct4 union all select abs(c2)/2 from ct4") + tdSql.checkRows(22) + + # unique with join + # prepare join datas with same ts + + tdSql.execute(" use db ") + tdSql.execute(" create stable st1 (ts timestamp , num int) tags(ind int)") + tdSql.execute(" create table tb1 using st1 tags(1)") + tdSql.execute(" create table tb2 using st1 tags(2)") + + tdSql.execute(" create stable st2 (ts timestamp , num int) tags(ind int)") + tdSql.execute(" create table ttb1 using st2 tags(1)") + tdSql.execute(" create table ttb2 using st2 tags(2)") + + start_ts = 1622369635000 # 2021-05-30 18:13:55 + + for i in range(10): + ts_value = start_ts+i*1000 + tdSql.execute(f" insert into tb1 values({ts_value} , {i})") + tdSql.execute(f" insert into tb2 values({ts_value} , {i})") + + tdSql.execute(f" insert into ttb1 values({ts_value} , {i})") + tdSql.execute(f" insert into ttb2 values({ts_value} , {i})") + + tdSql.query("select unique(tb2.num) from tb1, tb2 where tb1.ts=tb2.ts ") + tdSql.checkRows(10) + tdSql.checkData(0,0,0) + tdSql.checkData(1,0,1) + tdSql.checkData(2,0,2) + tdSql.checkData(9,0,9) + + tdSql.query("select unique(tb2.num) from tb1, tb2 where tb1.ts=tb2.ts union all select unique(tb1.num) from tb1, tb2 where tb1.ts=tb2.ts ") + tdSql.checkRows(20) + tdSql.checkData(0,0,0) + tdSql.checkData(1,0,1) + tdSql.checkData(2,0,2) + tdSql.checkData(9,0,9) + + # nest query + # tdSql.query("select unique(c1) from (select c1 from ct1)") + tdSql.query("select c1 from (select unique(c1) c1 from ct4)") + tdSql.checkRows(10) + tdSql.checkData(0, 0, None) + tdSql.checkData(1, 0, 8) + tdSql.checkData(9, 0, 0) + + tdSql.query("select sum(c1) from (select unique(c1) c1 from ct1)") + tdSql.checkRows(1) + tdSql.checkData(0, 0, 45) + + tdSql.query("select sum(c1) from (select distinct(c1) c1 from ct1) union all select sum(c1) from (select unique(c1) c1 from ct1)") + tdSql.checkRows(2) + tdSql.checkData(0, 0, 45) + tdSql.checkData(1, 0, 45) + + tdSql.query("select 1-abs(c1) from (select unique(c1) c1 from ct4)") + tdSql.checkRows(10) + tdSql.checkData(0, 0, None) + tdSql.checkData(1, 0, -7.000000000) + + + # bug for stable + #partition by tbname + # tdSql.query(" select unique(c1) from stb1 partition by tbname ") + # tdSql.checkRows(21) + + # tdSql.query(" select unique(c1) from stb1 partition by tbname ") + # tdSql.checkRows(21) + + # group by + tdSql.error("select unique(c1) from ct1 group by c1") + tdSql.error("select unique(c1) from ct1 group by tbname") + + # super table + + + + + def check_boundary_values(self): + + tdSql.execute("drop database if exists bound_test") + tdSql.execute("create database if not exists bound_test") + tdSql.execute("use bound_test") + tdSql.execute( + "create table stb_bound (ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(32),c9 nchar(32), c10 timestamp) tags (t1 int);" + ) + tdSql.execute(f'create table sub1_bound using stb_bound tags ( 1 )') + tdSql.execute( + f"insert into sub1_bound values ( now()-1s, 2147483647, 9223372036854775807, 32767, 127, 3.40E+38, 1.7e+308, True, 'binary_tb1', 'nchar_tb1', now() )" + ) + tdSql.execute( + f"insert into sub1_bound values ( now(), 2147483646, 9223372036854775806, 32766, 126, 3.40E+38, 1.7e+308, True, 'binary_tb1', 'nchar_tb1', now() )" + ) + + tdSql.execute( + f"insert into sub1_bound values ( now(), -2147483646, -9223372036854775806, -32766, -126, -3.40E+38, -1.7e+308, True, 'binary_tb1', 'nchar_tb1', now() )" + ) + + tdSql.execute( + f"insert into sub1_bound values ( now(), 2147483643, 9223372036854775803, 32763, 123, 3.39E+38, 1.69e+308, True, 'binary_tb1', 'nchar_tb1', now() )" + ) + + tdSql.execute( + f"insert into sub1_bound values ( now(), -2147483643, -9223372036854775803, -32763, -123, -3.39E+38, -1.69e+308, True, 'binary_tb1', 'nchar_tb1', now() )" + ) + + tdSql.error( + f"insert into sub1_bound values ( now()+1s, 2147483648, 9223372036854775808, 32768, 128, 3.40E+38, 1.7e+308, True, 'binary_tb1', 'nchar_tb1', now() )" + ) + + tdSql.query("select unique(c2) from sub1_bound") + tdSql.checkRows(5) + tdSql.checkData(0,0,9223372036854775807) + + def run(self): # sourcery skip: extract-duplicate-method, remove-redundant-fstring + tdSql.prepare() + + tdLog.printNoPrefix("==========step1:create table ==============") + + self.prepare_datas() + + tdLog.printNoPrefix("==========step2:test errors ==============") + + self.test_errors() + + tdLog.printNoPrefix("==========step3:support types ============") + + self.support_types() + + tdLog.printNoPrefix("==========step4: floor basic query ============") + + self.basic_unique_function() + + tdLog.printNoPrefix("==========step5: floor boundary query ============") + + self.check_boundary_values() + + + def stop(self): + tdSql.close() + tdLog.success(f"{__file__} successfully executed") + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) diff --git a/tests/system-test/fulltest.sh b/tests/system-test/fulltest.sh index 5708a7adeb253d84a3b170cdc83c705d1a821e87..7b4908eba21ce1e20bc263cf5675c0fd748ece5c 100644 --- a/tests/system-test/fulltest.sh +++ b/tests/system-test/fulltest.sh @@ -80,6 +80,7 @@ python3 ./test.py -f 2-query/mavg.py python3 ./test.py -f 2-query/diff.py python3 ./test.py -f 2-query/sample.py python3 ./test.py -f 2-query/function_diff.py +python3 ./test.py -f 2-query/unique.py python3 ./test.py -f 7-tmq/basic5.py python3 ./test.py -f 7-tmq/subscribeDb.py @@ -91,4 +92,3 @@ python3 ./test.py -f 7-tmq/subscribeStb1.py python3 ./test.py -f 7-tmq/subscribeStb2.py python3 ./test.py -f 7-tmq/subscribeStb3.py python3 ./test.py -f 7-tmq/subscribeStb4.py -python3 ./test.py -f 7-tmq/subscribeStb2.py \ No newline at end of file