From 46fe7fc81f73f0d40a40670673de6bbb8dc17635 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Tue, 15 Mar 2022 14:46:44 +0800 Subject: [PATCH] add docs for new function mode/tail/unique/stateCount/stateDuration/hyperloglog --- documentation20/cn/12.taos-sql/docs.md | 241 +++++++++++++++++++++++++ documentation20/en/12.taos-sql/docs.md | 231 ++++++++++++++++++++++++ 2 files changed, 472 insertions(+) diff --git a/documentation20/cn/12.taos-sql/docs.md b/documentation20/cn/12.taos-sql/docs.md index 025065fe9e..cf0e011214 100755 --- a/documentation20/cn/12.taos-sql/docs.md +++ b/documentation20/cn/12.taos-sql/docs.md @@ -1083,7 +1083,71 @@ TDengine支持针对数据的聚合查询。提供支持的聚合和选择函数 {slop:1.000000, intercept:9.733334} | Query OK, 1 row(s) in set (0.000921s) ``` + +- **MODE** + ```mysql + SELECT MODE(field_name) FROM tb_name [WHERE clause]; + ``` + 功能说明:返回出现频率最高的值,若存在多个频率相同的最高值,输出空。不能匹配标签、时间戳输出。 + + 返回结果数据类型:同应用的字段。 + + 应用字段:适合于除时间主列外的任何类型字段。 + 适用于:**表**。 + + 示例: + ```mysql + taos> select voltage from d002; + voltage | + ======================== + 1 | + 1 | + 2 | + 19 | + Query OK, 4 row(s) in set (0.003545s) + + taos> select mode(voltage) from d002; + mode(voltage) | + ======================== + 1 | + Query OK, 1 row(s) in set (0.019393s) + ``` + +- **HYPERLOGLOG** + ```mysql + SELECT HYPERLOGLOG(field_name) FROM { tb_name | stb_name } [WHERE clause]; + ``` + 功能说明:采用hyperloglog算法,返回某列的基数。该算法在数据量很大的情况下,可以明显降低内存的占用,但是求出来的基数是个估算值,标准误差为0.81%。 + + 返回结果类型:整形。 + + 适用数据类型:适合于任何类型字段。 + + 支持的版本:2.6.0.x 之后的版本。 + + 示例: + ```mysql + taos> select dbig from shll; + dbig | + ======================== + 1 | + 1 | + 1 | + NULL | + 2 | + 19 | + NULL | + 9 | + Query OK, 8 row(s) in set (0.003755s) + + taos> select hyperloglog(dbig) from shll; + hyperloglog(dbig)| + ======================== + 4 | + Query OK, 1 row(s) in set (0.008388s) + ``` + ### 选择函数 在使用所有的选择函数的时候,可以同时指定输出 ts 列或标签列(包括 tbname),这样就可以方便地知道被选出的值是源于哪个数据行的。 @@ -1482,6 +1546,84 @@ TDengine支持针对数据的聚合查询。提供支持的聚合和选择函数 Query OK, 2 row(s) in set (0.003487s) ``` +- **TAIL** + ```mysql + SELECT TAIL(field_name, k, offset_val) FROM {tb_name | stb_name} [WHERE clause]; + ``` + 功能说明:返回跳过最后 offset_value个,然后取连续 k 个记录,不忽略 NULL 值。offset_val 可以不输入。此时返回最后的 k 个记录。当有 offset_val 输入的情况下,该函数功能等效于order by ts desc LIMIT k OFFSET offset_val。 + + 参数范围:k: [1,100] offset_val: [0,100]。 + + 返回结果数据类型:同应用的字段。 + + 应用字段:适合于除时间主列外的任何类型字段。 + + 适用于:**表、超级表**。 + + 支持版本:2.6.0.x 之后的版本。 + + 示例: + ```mysql + taos> select ts,dbig from tail2; + ts | dbig | + ================================================== + 2021-10-15 00:31:33.000 | 1 | + 2021-10-17 00:31:31.000 | NULL | + 2021-12-24 00:31:34.000 | 2 | + 2022-01-01 08:00:05.000 | 19 | + 2022-01-01 08:00:06.000 | NULL | + 2022-01-01 08:00:07.000 | 9 | + Query OK, 6 row(s) in set (0.001952s) + + taos> select tail(dbig,2,2) from tail2; + ts | tail(dbig,2,2) | + ================================================== + 2021-12-24 00:31:34.000 | 2 | + 2022-01-01 08:00:05.000 | 19 | + Query OK, 2 row(s) in set (0.002307s) + +- **UNIQUE** + ```mysql + SELECT UNIQUE(field_name) FROM {tb_name | stb_name} [WHERE clause]; + ``` + 功能说明:返回该列的数值首次出现的值。该函数功能与 distinct 相似,但是可以匹配标签和时间戳信息。可以针对除时间列以外的字段进行查询,可以匹配标签和时间戳,其中的标签和时间戳是第一次出现时刻的标签和时间戳。 + + 返回结果数据类型:同应用的字段。 + + 应用字段:适合于除时间类型以外的字段。 + + 适用于:**表、超级表**。 + + 支持版本:2.6.0.x 之后的版本。 + + 说明:该函数可以应用在普通表和超级表上。不能和窗口操作一起使用,例如 interval/state_window/session_window 。 + + 示例: + ```mysql + taos> select ts,voltage from unique1; + ts | voltage | + ================================================== + 2021-10-17 00:31:31.000 | 1 | + 2022-01-24 00:31:31.000 | 1 | + 2021-10-17 00:31:31.000 | 1 | + 2021-12-24 00:31:31.000 | 2 | + 2022-01-01 08:00:01.000 | 19 | + 2021-10-17 00:31:31.000 | NULL | + 2022-01-01 08:00:02.000 | NULL | + 2022-01-01 08:00:03.000 | 9 | + Query OK, 8 row(s) in set (0.003018s) + + taos> select unique(voltage) from unique1; + ts | unique(voltage) | + ================================================== + 2021-10-17 00:31:31.000 | 1 | + 2021-10-17 00:31:31.000 | NULL | + 2021-12-24 00:31:31.000 | 2 | + 2022-01-01 08:00:01.000 | 19 | + 2022-01-01 08:00:03.000 | 9 | + Query OK, 5 row(s) in set (0.108458s) + + ### 计算函数 - **DIFF** @@ -1623,6 +1765,105 @@ TDengine支持针对数据的聚合查询。提供支持的聚合和选择函数 80.810000718 | Query OK, 3 row(s) in set (0.001046s) ``` + +- **STATECOUNT** + ```mysql + SELECT STATECOUNT(field_name, oper, val) FROM { tb_name | stb_name } [WHERE clause]; + ``` + 功能说明:返回满足某个条件的连续记录的个数,结果作为新的一列追加在每行后面。条件根据参数计算,如果条件为true则加1,条件为false则重置为-1,如果数据为NULL,跳过该条数据。 + + 参数范围: + - oper : LT (小于)、GT(大于)、LE(小于等于)、GE(大于等于)、NE(不等于)、EQ(等于),不区分大小写。 + - val : 数值型 + + 返回结果类型:整形。 + + 适用数据类型:不能应用在 timestamp、binary、nchar、bool 类型字段上。 + + 嵌套子查询支持:不支持应用在子查询上。 + + 支持的版本:2.6.0.x 之后的版本。 + + 说明: + + - 该函数可以应用在普通表上,在由 GROUP BY 划分出单独时间线的情况下用于超级表(也即 GROUP BY tbname) + + - 不能和窗口操作一起使用,例如interval/state_window/session_window。 + + 示例: + ```mysql + taos> select ts,dbig from statef2; + ts | dbig | + ======================================================== + 2021-10-15 00:31:33.000000000 | 1 | + 2021-10-17 00:31:31.000000000 | NULL | + 2021-12-24 00:31:34.000000000 | 2 | + 2022-01-01 08:00:05.000000000 | 19 | + 2022-01-01 08:00:06.000000000 | NULL | + 2022-01-01 08:00:07.000000000 | 9 | + Query OK, 6 row(s) in set (0.002977s) + + taos> select stateCount(dbig,GT,2) from statef2; + ts | dbig | statecount(dbig,gt,2) | + ================================================================================ + 2021-10-15 00:31:33.000000000 | 1 | -1 | + 2021-10-17 00:31:31.000000000 | NULL | NULL | + 2021-12-24 00:31:34.000000000 | 2 | -1 | + 2022-01-01 08:00:05.000000000 | 19 | 1 | + 2022-01-01 08:00:06.000000000 | NULL | NULL | + 2022-01-01 08:00:07.000000000 | 9 | 2 | + Query OK, 6 row(s) in set (0.002791s) + ``` + +- **STATEDURATION** + ```mysql + SELECT stateDuration(field_name, oper, val, unit) FROM { tb_name | stb_name } [WHERE clause]; + ``` + 功能说明:返回满足某个条件的连续记录的时间长度,结果作为新的一列追加在每行后面。条件根据参数计算,如果条件为true则加上两个记录之间的时间长度(第一个满足条件的记录时间长度记为0),条件为false则重置为-1,如果数据为NULL,跳过该条数据。 + + 参数范围: + - oper : LT (小于)、GT(大于)、LE(小于等于)、GE(大于等于)、NE(不等于)、EQ(等于),不区分大小写。 + - val : 数值型 + - unit : 时间长度的单位,范围[1s、1m、1h ],不足一个单位舍去。默认为1s。 + + 返回结果类型:整形。 + + 适用数据类型:不能应用在 timestamp、binary、nchar、bool 类型字段上。 + + 嵌套子查询支持:不支持应用在子查询上。 + + 支持的版本:2.6.0.x 之后的版本。 + + 说明: + + - 该函数可以应用在普通表上,在由 GROUP BY 划分出单独时间线的情况下用于超级表(也即 GROUP BY tbname) + + - 不能和窗口操作一起使用,例如interval/state_window/session_window。 + + 示例: + ```mysql + taos> select ts,dbig from statef2; + ts | dbig | + ======================================================== + 2021-10-15 00:31:33.000000000 | 1 | + 2021-10-17 00:31:31.000000000 | NULL | + 2021-12-24 00:31:34.000000000 | 2 | + 2022-01-01 08:00:05.000000000 | 19 | + 2022-01-01 08:00:06.000000000 | NULL | + 2022-01-01 08:00:07.000000000 | 9 | + Query OK, 6 row(s) in set (0.002407s) + + taos> select stateDuration(dbig,GT,2) from statef2; + ts | dbig | stateduration(dbig,gt,2) | + =================================================================================== + 2021-10-15 00:31:33.000000000 | 1 | -1 | + 2021-10-17 00:31:31.000000000 | NULL | NULL | + 2021-12-24 00:31:34.000000000 | 2 | -1 | + 2022-01-01 08:00:05.000000000 | 19 | 0 | + 2022-01-01 08:00:06.000000000 | NULL | NULL | + 2022-01-01 08:00:07.000000000 | 9 | 2 | + Query OK, 6 row(s) in set (0.002613s) + ``` ## 按窗口切分聚合 diff --git a/documentation20/en/12.taos-sql/docs.md b/documentation20/en/12.taos-sql/docs.md index cbd2d77a50..9a68e07178 100755 --- a/documentation20/en/12.taos-sql/docs.md +++ b/documentation20/en/12.taos-sql/docs.md @@ -852,6 +852,69 @@ TDengine supports aggregations over data, they are listed below: Query OK, 1 row(s) in set (0.000921s) ``` +- **MODE** + ```mysql + SELECT MODE(field_name) FROM tb_name [WHERE clause]; + ``` + Function: Returns the value with the highest frequency. If there are multiple highest values with the same frequency, the output is NULL. + + Return Data Type: Same as applicable fields. + + Applicable Fields: All types except timestamp. + + Supported version: Version after 2.6.0 . + + Example: + ```mysql + taos> select voltage from d002; + voltage | + ======================== + 1 | + 1 | + 2 | + 19 | + Query OK, 4 row(s) in set (0.003545s) + + taos> select mode(voltage) from d002; + mode(voltage) | + ======================== + 1 | + Query OK, 1 row(s) in set (0.019393s) + ``` + +- **HYPERLOGLOG** + ```mysql + SELECT HYPERLOGLOG(field_name) FROM { tb_name | stb_name } [WHERE clause]; + ``` + Function: The hyperloglog algorithm is used to return the cardinality of a column. In the case of large amount of data, the algorithm can significantly reduce the occupation of memory, but the cardinality is an estimated value, and the standard error is 0.81%. + + Return Data Type:Integer. + + Applicable Fields: All types. + + Supported version: Version after 2.6.0 . + + Example: + ```mysql + taos> select dbig from shll; + dbig | + ======================== + 1 | + 1 | + 1 | + NULL | + 2 | + 19 | + NULL | + 9 | + Query OK, 8 row(s) in set (0.003755s) + + taos> select hyperloglog(dbig) from shll; + hyperloglog(dbig)| + ======================== + 4 | + Query OK, 1 row(s) in set (0.008388s) + ### Selector Functions - **MIN** @@ -1102,6 +1165,83 @@ TDengine supports aggregations over data, they are listed below: Query OK, 1 row(s) in set (0.001042s) ``` +- **TAIL** + ```mysql + SELECT TAIL(field_name, k, offset_val) FROM {tb_name | stb_name} [WHERE clause]; + ``` + Function: Skip the last num of offset_value, return the k consecutive records without ignoring NULL value. offset_val can be empty, then the last K records are returned.The function is equivalent to:order by ts desc LIMIT k OFFSET offset_val. + + Range:k: [1,100] offset_val: [0,100]。 + + Return Data Type: Same as applicable fields. + + Applicable Fields: All types except timestamp. + + Applied to: **table stable**. + + Supported version: Version after 2.6.0 . + + Example: + ```mysql + taos> select ts,dbig from tail2; + ts | dbig | + ================================================== + 2021-10-15 00:31:33.000 | 1 | + 2021-10-17 00:31:31.000 | NULL | + 2021-12-24 00:31:34.000 | 2 | + 2022-01-01 08:00:05.000 | 19 | + 2022-01-01 08:00:06.000 | NULL | + 2022-01-01 08:00:07.000 | 9 | + Query OK, 6 row(s) in set (0.001952s) + + taos> select tail(dbig,2,2) from tail2; + ts | tail(dbig,2,2) | + ================================================== + 2021-12-24 00:31:34.000 | 2 | + 2022-01-01 08:00:05.000 | 19 | + Query OK, 2 row(s) in set (0.002307s) + +- **UNIQUE** + ```mysql + SELECT UNIQUE(field_name) FROM {tb_name | stb_name} [WHERE clause]; + ``` + Function: Returns the first occurrence of a value in this column. + + Return Data Type: Same as applicable fields. + + Applicable Fields: All types except timestamp. + + Applied to: **table stable**. + + Supported version: Version after 2.6.0 . + + Note: This function can be applied to ordinary tables and super tables. Cannot be used with window operations,such as interval/state_window/session_window. + + Example: + ```mysql + taos> select ts,voltage from unique1; + ts | voltage | + ================================================== + 2021-10-17 00:31:31.000 | 1 | + 2022-01-24 00:31:31.000 | 1 | + 2021-10-17 00:31:31.000 | 1 | + 2021-12-24 00:31:31.000 | 2 | + 2022-01-01 08:00:01.000 | 19 | + 2021-10-17 00:31:31.000 | NULL | + 2022-01-01 08:00:02.000 | NULL | + 2022-01-01 08:00:03.000 | 9 | + Query OK, 8 row(s) in set (0.003018s) + + taos> select unique(voltage) from unique1; + ts | unique(voltage) | + ================================================== + 2021-10-17 00:31:31.000 | 1 | + 2021-10-17 00:31:31.000 | NULL | + 2021-12-24 00:31:31.000 | 2 | + 2022-01-01 08:00:01.000 | 19 | + 2022-01-01 08:00:03.000 | 9 | + Query OK, 5 row(s) in set (0.108458s) + ### Computing Functions - **DIFF** @@ -1172,6 +1312,97 @@ TDengine supports aggregations over data, they are listed below: 1. Calculation between two or more columns is supported, and the calculation priorities can be controlled by parentheses(); 2. The NULL field does not participate in the calculation. If a row involved in calculation contains NULL, the calculation result of the row is NULL. +- **STATECOUNT** + ```mysql + SELECT STATECOUNT(field_name, oper, val) FROM { tb_name | stb_name } [WHERE clause]; + ``` + Function: Returns the number of consecutive records that meet a certain condition, and the result is appended to each row as a new column. The condition is calculated according to the parameters. If the condition is true, it will be increased by 1. If the condition is false, it will be reset to -1. If the data is NULL, the data will be skipped. + + Range: + - oper : LT(<),GT(>),LE(<=),GE(>=),NE(!=),EQ(=),case insensitive. + - val : Number. + + Returned Data Type: Integer。 + + Applicable Fields: All types except timestamp, binary, nchar, bool. + + Supported version: Version after 2.6.0 . + + Note: + - This function can be applied to ordinary tables. When a separate timeline is divided by group by, it is used for super tables (i.e. group by TBNAME). + - Cannot be used with window operations,such as interval/state_window/session_window. + + Example: + ```mysql + taos> select ts,dbig from statef2; + ts | dbig | + ======================================================== + 2021-10-15 00:31:33.000000000 | 1 | + 2021-10-17 00:31:31.000000000 | NULL | + 2021-12-24 00:31:34.000000000 | 2 | + 2022-01-01 08:00:05.000000000 | 19 | + 2022-01-01 08:00:06.000000000 | NULL | + 2022-01-01 08:00:07.000000000 | 9 | + Query OK, 6 row(s) in set (0.002977s) + + taos> select stateCount(dbig,GT,2) from statef2; + ts | dbig | statecount(dbig,gt,2) | + ================================================================================ + 2021-10-15 00:31:33.000000000 | 1 | -1 | + 2021-10-17 00:31:31.000000000 | NULL | NULL | + 2021-12-24 00:31:34.000000000 | 2 | -1 | + 2022-01-01 08:00:05.000000000 | 19 | 1 | + 2022-01-01 08:00:06.000000000 | NULL | NULL | + 2022-01-01 08:00:07.000000000 | 9 | 2 | + Query OK, 6 row(s) in set (0.002791s) + ``` + +- **STATEDURATION** + ```mysql + SELECT stateDuration(field_name, oper, val, unit) FROM { tb_name | stb_name } [WHERE clause]; + ``` + Function: Returns the length of time of continuous records that meet a certain condition, and the result is appended to each row as a new column. The condition is calculated according to the parameters. If the condition is true, the length of time between two records will be added (the length of time of the first record that meets the condition is recorded as 0). If the condition is false, it will be reset to -1. If the data is NULL, the data will be skipped. + + Range: + - oper : LT(<),GT(>),LE(<=),GE(>=),NE(!=),EQ(=),case insensitive. + - val : Number. + - unit : Unit of time length, range [1s, 1M, 1H], less than one unit is rounded off. The default is 1s. + + Returned Data Type: Integer。 + + Applicable Fields: All types except timestamp, binary, nchar, bool. + + Supported version: Version after 2.6.0 . + + Note: + - This function can be applied to ordinary tables. When a separate timeline is divided by group by, it is used for super tables (i.e. group by TBNAME). + - Cannot be used with window operations,such as interval/state_window/session_window. + + Example: + ```mysql + taos> select ts,dbig from statef2; + ts | dbig | + ======================================================== + 2021-10-15 00:31:33.000000000 | 1 | + 2021-10-17 00:31:31.000000000 | NULL | + 2021-12-24 00:31:34.000000000 | 2 | + 2022-01-01 08:00:05.000000000 | 19 | + 2022-01-01 08:00:06.000000000 | NULL | + 2022-01-01 08:00:07.000000000 | 9 | + Query OK, 6 row(s) in set (0.002407s) + + taos> select stateDuration(dbig,GT,2) from statef2; + ts | dbig | stateduration(dbig,gt,2) | + =================================================================================== + 2021-10-15 00:31:33.000000000 | 1 | -1 | + 2021-10-17 00:31:31.000000000 | NULL | NULL | + 2021-12-24 00:31:34.000000000 | 2 | -1 | + 2022-01-01 08:00:05.000000000 | 19 | 0 | + 2022-01-01 08:00:06.000000000 | NULL | NULL | + 2022-01-01 08:00:07.000000000 | 9 | 2 | + Query OK, 6 row(s) in set (0.002613s) + ``` + ## Time-dimension Aggregation TDengine supports aggregating by intervals (time range). Data in a table can partitioned by intervals and aggregated to generate results. For example, a temperature sensor collects data once per second, but the average temperature needs to be queried every 10 minutes. This aggregation is suitable for down sample operation, and the syntax is as follows: -- GitLab