diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 0000000000000000000000000000000000000000..6f98693addd5cba9a40f6ab9335054951a78b2ee --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,13 @@ +# Use the latest 2.1 version of CircleCI pipeline process engine. See: https://circleci.com/docs/2.0/configuration-reference +version: 2.1 +# Use a package of configuration called an orb. +orbs: + # Declare a dependency on the welcome-orb + welcome: circleci/welcome-orb@0.4.1 +# Orchestrate or schedule a set of jobs +workflows: + # Name the workflow "welcome" + welcome: + # Run the welcome/run job in its own container + jobs: + - welcome/run diff --git a/.gitignore b/.gitignore index 1ff11080569e9312369f6e9c00463e25853fd38b..50f4251320abc80358b67eab22c02672d5f26bd6 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ build/ .vscode/ .idea/ cmake-build-debug/ +cmake-build-release/ cscope.out .DS_Store debug/ @@ -67,6 +68,8 @@ CMakeError.log *.o version.c taos.rc +src/connector/jdbc/.classpath +src/connector/jdbc/.project src/connector/jdbc/.settings/ tests/comparisonTest/cassandra/cassandratest/.classpath tests/comparisonTest/cassandra/cassandratest/.project diff --git a/.gitmodules b/.gitmodules index 346f5c00699e51eac39dbfaffdbf96656052b024..a2266c46afd180b52d3aa19003380078894f6a4b 100644 --- a/.gitmodules +++ b/.gitmodules @@ -13,3 +13,6 @@ [submodule "deps/jemalloc"] path = deps/jemalloc url = https://github.com/jemalloc/jemalloc +[submodule "deps/TSZ"] + path = deps/TSZ + url = https://github.com/taosdata/TSZ.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 1fc8d4f1b473311046f3d195fc78b9fc37344f3a..093731f190a380539cca3db8f8c12793d4b6557c 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,7 @@ IF (CMAKE_VERSION VERSION_LESS 3.0) PROJECT(TDengine CXX) SET(PROJECT_VERSION_MAJOR "${LIB_MAJOR_VERSION}") SET(PROJECT_VERSION_MINOR "${LIB_MINOR_VERSION}") - SET(PROJECT_VERSION_PATCH"${LIB_PATCH_VERSION}") + SET(PROJECT_VERSION_PATCH "${LIB_PATCH_VERSION}") SET(PROJECT_VERSION "${LIB_VERSION_STRING}") ELSE () CMAKE_POLICY(SET CMP0048 NEW) diff --git a/Jenkinsfile b/Jenkinsfile index 534777708dc14e76689bda41d76059189bdc271a..e6e8a1df322ab4d529c1f7d44664717d278ba194 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -95,7 +95,7 @@ def pre_test(){ make > /dev/null make install > /dev/null cd ${WKC}/tests - pip3 install ${WKC}/src/connector/python/ + pip3 install ${WKC}/src/connector/python ''' return 1 } diff --git a/README-CN.md b/README-CN.md index d4c10e71d684ab5d21c1c767c398707956946232..a9bc814e8d6f6bef0ad94e29588f62e2e4c0e7f1 100644 --- a/README-CN.md +++ b/README-CN.md @@ -23,7 +23,7 @@ TDengine是涛思数据专为物联网、车联网、工业互联网、IT运维 TDengine是一个高效的存储、查询、分析时序大数据的平台,专为物联网、车联网、工业互联网、运维监测等优化而设计。您可以像使用关系型数据库MySQL一样来使用它,但建议您在使用前仔细阅读一遍下面的文档,特别是 [数据模型](https://www.taosdata.com/cn/documentation/architecture) 与 [数据建模](https://www.taosdata.com/cn/documentation/model)。除本文档之外,欢迎 [下载产品白皮书](https://www.taosdata.com/downloads/TDengine%20White%20Paper.pdf)。 -# 生成 +# 构建 TDengine目前2.0版服务器仅能在Linux系统上安装和运行,后续会支持Windows、macOS等系统。客户端可以在Windows或Linux上安装和运行。任何OS的应用也可以选择RESTful接口连接服务器taosd。CPU支持X64/ARM64/MIPS64/Alpha64,后续会支持ARM32、RISC-V等CPU架构。用户可根据需求选择通过[源码](https://www.taosdata.com/cn/getting-started/#通过源码安装)或者[安装包](https://www.taosdata.com/cn/getting-started/#通过安装包安装)来安装。本快速指南仅适用于通过源码安装。 @@ -107,7 +107,7 @@ Go 连接器和 Grafana 插件在其他独立仓库,如果安装它们的话 git submodule update --init --recursive ``` -## 生成 TDengine +## 构建 TDengine ### Linux 系统 @@ -116,7 +116,13 @@ mkdir debug && cd debug cmake .. && cmake --build . ``` -在X86-64、X86、arm64 和 arm32 平台上,TDengine 生成脚本可以自动检测机器架构。也可以手动配置 CPUTYPE 参数来指定 CPU 类型,如 aarch64 或 aarch32 等。 +您可以选择使用 Jemalloc 作为内存分配器,替代默认的 glibc: +```bash +apt install autoconf +cmake .. -DJEMALLOC_ENABLED=true +``` + +在X86-64、X86、arm64、arm32 和 mips64 平台上,TDengine 生成脚本可以自动检测机器架构。也可以手动配置 CPUTYPE 参数来指定 CPU 类型,如 aarch64 或 aarch32 等。 aarch64: @@ -130,6 +136,12 @@ aarch32: cmake .. -DCPUTYPE=aarch32 && cmake --build . ``` +mips64: + +```bash +cmake .. -DCPUTYPE=mips64 && cmake --build . +``` + ### Windows 系统 如果你使用的是 Visual Studio 2013 版本: @@ -173,9 +185,10 @@ cmake .. && cmake --build . # 安装 -如果你不想安装,可以直接在shell中运行。生成完成后,安装 TDengine: +生成完成后,安装 TDengine(下文给出的指令以 Linux 为例,如果是在 Windows 下,那么对应的指令会是 `nmake install`): + ```bash -make install +sudo make install ``` 用户可以在[文件目录结构](https://www.taosdata.com/cn/documentation/administrator#directories)中了解更多在操作系统中生成的目录或文件。 @@ -183,7 +196,7 @@ make install 安装成功后,在终端中启动 TDengine 服务: ```bash -taosd +sudo systemctl start taosd ``` 用户可以使用 TDengine Shell 来连接 TDengine 服务,在终端中,输入: @@ -196,7 +209,7 @@ taos ## 快速运行 -TDengine 生成后,在终端执行以下命令: +如果不希望以服务方式运行 TDengine,也可以在终端中直接运行它。也即在生成完成后,执行以下命令(在 Windows 下,生成的可执行文件会带有 .exe 后缀,例如会名为 taosd.exe ): ```bash ./build/bin/taosd -c test/cfg diff --git a/README.md b/README.md index 78f902babe240b76b82d2b77b687f0de15ff6ccd..2dea05f09d268b0d78de15ab98f3584df055c353 100644 --- a/README.md +++ b/README.md @@ -110,7 +110,13 @@ mkdir debug && cd debug cmake .. && cmake --build . ``` -TDengine build script can detect the host machine's architecture on X86-64, X86, arm64 and arm32 platform. +You can use Jemalloc as memory allocator instead of glibc: +``` +apt install autoconf +cmake .. -DJEMALLOC_ENABLED=true +``` + +TDengine build script can detect the host machine's architecture on X86-64, X86, arm64, arm32 and mips64 platform. You can also specify CPUTYPE option like aarch64 or aarch32 too if the detection result is not correct: aarch64: @@ -123,13 +129,18 @@ aarch32: cmake .. -DCPUTYPE=aarch32 && cmake --build . ``` +mips64: +```bash +cmake .. -DCPUTYPE=mips64 && cmake --build . +``` + ### On Windows platform If you use the Visual Studio 2013, please open a command window by executing "cmd.exe". -Please specify "x86_amd64" for 64 bits Windows or specify "x86" is for 32 bits Windows when you execute vcvarsall.bat. +Please specify "amd64" for 64 bits Windows or specify "x86" is for 32 bits Windows when you execute vcvarsall.bat. ```cmd mkdir debug && cd debug -"C:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\vcvarsall.bat" < x86_amd64 | x86 > +"C:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\vcvarsall.bat" < amd64 | x86 > cmake .. -G "NMake Makefiles" nmake ``` @@ -164,7 +175,7 @@ cmake .. && cmake --build . # Installing -After building successfully, TDengine can be installed by: +After building successfully, TDengine can be installed by: (On Windows platform, the following command should be `nmake install`) ```bash sudo make install ``` @@ -186,7 +197,7 @@ If TDengine shell connects the server successfully, welcome messages and version ## Quick Run -If you don't want to run TDengine as a service, you can run it in current shell. For example, to quickly start a TDengine server after building, run the command below in terminal: +If you don't want to run TDengine as a service, you can run it in current shell. For example, to quickly start a TDengine server after building, run the command below in terminal: (We take Linux as an example, command on Windows will be `taosd.exe`) ```bash ./build/bin/taosd -c test/cfg ``` diff --git a/cmake/define.inc b/cmake/define.inc index 0d2887c823a47793f1847a72fabe91ecef493781..6c466fee026097b0bdeb89c7a4fc54fc382c2726 100755 --- a/cmake/define.inc +++ b/cmake/define.inc @@ -41,6 +41,10 @@ IF (TD_POWER) ADD_DEFINITIONS(-D_TD_POWER_) ENDIF () +IF (TD_TQ) + ADD_DEFINITIONS(-D_TD_TQ_) +ENDIF () + IF (TD_MEM_CHECK) ADD_DEFINITIONS(-DTAOS_MEM_CHECK) ENDIF () @@ -79,6 +83,8 @@ IF (TD_ARM_64) ADD_DEFINITIONS(-DUSE_LIBICONV) MESSAGE(STATUS "arm64 is defined") SET(COMMON_FLAGS "-Wall -Werror -fPIC -fsigned-char -fpack-struct=8 -D_FILE_OFFSET_BITS=64 -D_LARGE_FILE") + + INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/deps/lua/src) ENDIF () IF (TD_ARM_32) @@ -87,6 +93,8 @@ IF (TD_ARM_32) ADD_DEFINITIONS(-DUSE_LIBICONV) MESSAGE(STATUS "arm32 is defined") SET(COMMON_FLAGS "-Wall -Werror -fPIC -fsigned-char -fpack-struct=8 -D_FILE_OFFSET_BITS=64 -D_LARGE_FILE -Wno-pointer-to-int-cast -Wno-int-to-pointer-cast -Wno-incompatible-pointer-types ") + + INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/deps/lua/src) ENDIF () IF (TD_MIPS_64) @@ -139,6 +147,7 @@ IF (TD_LINUX) INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/deps/cJson/inc) INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/deps/lz4/inc) + INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/deps/lua/src) ENDIF () IF (TD_DARWIN_64) @@ -160,6 +169,7 @@ IF (TD_DARWIN_64) SET(RELEASE_FLAGS "-Og") INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/deps/cJson/inc) INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/deps/lz4/inc) + INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/deps/lua/src) ENDIF () IF (TD_WINDOWS) @@ -174,12 +184,15 @@ IF (TD_WINDOWS) IF (MSVC AND (MSVC_VERSION GREATER_EQUAL 1900)) SET(COMMON_FLAGS "${COMMON_FLAGS} /Wv:18") ENDIF () + IF (TD_MEMORY_SANITIZER) + MESSAGE("memory sanitizer detected as true") SET(DEBUG_FLAGS "/fsanitize=address /Zi /W3 /GL") ELSE () + MESSAGE("memory sanitizer detected as false") SET(DEBUG_FLAGS "/Zi /W3 /GL") ENDIF () - SET(RELEASE_FLAGS "/W0 /O3 /GL") + SET(RELEASE_FLAGS "/W0 /O2 /GL") # MSVC only support O2 ENDIF () INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/deps/pthread) @@ -187,6 +200,7 @@ IF (TD_WINDOWS) INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/deps/regex) INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/deps/wepoll/inc) INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/deps/MsvcLibX/include) + INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/deps/lua/src) ENDIF () IF (TD_WINDOWS_64) @@ -202,6 +216,10 @@ IF (TD_WINDOWS_32) MESSAGE(STATUS "windows32 is defined") ENDIF () +IF (TD_LINUX) + SET(COMMON_FLAGS "${COMMON_FLAGS} -pipe -Wshadow") +ENDIF () + INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/src/inc) INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/src/os/inc) INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/src/util/inc) diff --git a/cmake/input.inc b/cmake/input.inc index 16ffdc2f47ff8321c4b377a06cf952373d558d55..9d716e1e7345955f7b6b844c85ace7e7bd5c6080 100755 --- a/cmake/input.inc +++ b/cmake/input.inc @@ -46,6 +46,9 @@ ENDIF () IF (${DBNAME} MATCHES "power") SET(TD_POWER TRUE) MESSAGE(STATUS "power is true") +ELSEIF (${DBNAME} MATCHES "tq") + SET(TD_TQ TRUE) + MESSAGE(STATUS "tq is true") ENDIF () IF (${DLLTYPE} MATCHES "go") @@ -73,6 +76,11 @@ IF (${RANDOM_NETWORK_FAIL} MATCHES "true") MESSAGE(STATUS "build with random-network-fail enabled") ENDIF () +IF (${JEMALLOC_ENABLED} MATCHES "true") + SET(TD_JEMALLOC_ENABLED TRUE) + MESSAGE(STATUS "build with jemalloc enabled") +ENDIF () + SET(TD_BUILD_JDBC TRUE) IF (${BUILD_JDBC} MATCHES "false") @@ -83,3 +91,12 @@ SET(TD_MEMORY_SANITIZER FALSE) IF (${MEMORY_SANITIZER} MATCHES "true") SET(TD_MEMORY_SANITIZER TRUE) ENDIF () + +IF (${TSZ_ENABLED} MATCHES "true") + # define add + MESSAGE(STATUS "build with TSZ enabled") + ADD_DEFINITIONS(-DTD_TSZ) + set(VAR_TSZ "TSZ" CACHE INTERNAL "global variant tsz" ) +ELSE() + set(VAR_TSZ "" CACHE INTERNAL "global variant empty" ) +ENDIF() diff --git a/cmake/platform.inc b/cmake/platform.inc index 82ff27a44dbff38b87e8304f978b8f34e204623f..a78082a1fc62a8ad66c54dcf005e3e15edf5f5f0 100755 --- a/cmake/platform.inc +++ b/cmake/platform.inc @@ -157,5 +157,5 @@ ELSEIF (${OSTYPE} MATCHES "Alpine") MESSAGE(STATUS "input osType: Alpine") SET(TD_APLHINE TRUE) ELSE () - MESSAGE(STATUS "input osType unknown: " ${OSTYPE}) + MESSAGE(STATUS "The user specified osType is unknown: " ${OSTYPE}) ENDIF () diff --git a/cmake/version.inc b/cmake/version.inc index 3bb06bfcf2c6fb3aaeec9bcf3fa138260192a5ea..7c0a824c9c39c6760b3e2408d969048983430811 100755 --- a/cmake/version.inc +++ b/cmake/version.inc @@ -4,7 +4,7 @@ PROJECT(TDengine) IF (DEFINED VERNUMBER) SET(TD_VER_NUMBER ${VERNUMBER}) ELSE () - SET(TD_VER_NUMBER "2.0.20.10") + SET(TD_VER_NUMBER "2.1.5.0") ENDIF () IF (DEFINED VERCOMPATIBLE) diff --git a/deps/CMakeLists.txt b/deps/CMakeLists.txt index eb22459d342da5f726d8688a74b4a5efde2ac5ec..516c752bd101f26f04c3986ed50edd55121c5a40 100644 --- a/deps/CMakeLists.txt +++ b/deps/CMakeLists.txt @@ -15,7 +15,6 @@ ADD_SUBDIRECTORY(cJson) ADD_SUBDIRECTORY(wepoll) ADD_SUBDIRECTORY(MsvcLibX) ADD_SUBDIRECTORY(rmonotonic) - ADD_SUBDIRECTORY(lua) IF (TD_LINUX AND TD_MQTT) @@ -38,3 +37,7 @@ IF (TD_LINUX_64 AND JEMALLOC_ENABLED) BUILD_COMMAND ${MAKE} ) ENDIF () + +IF (${TSZ_ENABLED} MATCHES "true") + ADD_SUBDIRECTORY(TSZ) +ENDIF() \ No newline at end of file diff --git a/deps/TSZ b/deps/TSZ new file mode 160000 index 0000000000000000000000000000000000000000..0ca5b15a8eac40327dd737be52c926fa5675712c --- /dev/null +++ b/deps/TSZ @@ -0,0 +1 @@ +Subproject commit 0ca5b15a8eac40327dd737be52c926fa5675712c diff --git a/deps/rmonotonic/src/monotonic.c b/deps/rmonotonic/src/monotonic.c index 1470f91b56c79b4ee2d8429ecf58fc365d03e737..c6d2df9097ce0d435fb9dd1ec42952dd37c10de9 100644 --- a/deps/rmonotonic/src/monotonic.c +++ b/deps/rmonotonic/src/monotonic.c @@ -36,6 +36,15 @@ static char monotonic_info_string[32]; static long mono_ticksPerMicrosecond = 0; +#ifdef _TD_NINGSI_60 +// implement __rdtsc in ningsi60 +uint64_t __rdtsc(){ + unsigned int lo,hi; + __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); + return ((uint64_t)hi << 32) | lo; +} +#endif + static monotime getMonotonicUs_x86() { return __rdtsc() / mono_ticksPerMicrosecond; } diff --git a/documentation20/cn/00.index/docs.md b/documentation20/cn/00.index/docs.md index aba10a14e327ff104eb997b1ad6af29e3de6cad1..18bdc15d30430516c3ae6c847fc448477003dd66 100644 --- a/documentation20/cn/00.index/docs.md +++ b/documentation20/cn/00.index/docs.md @@ -15,6 +15,7 @@ TDengine是一个高效的存储、查询、分析时序大数据的平台,专 * [命令行程序TAOS](/getting-started#console):访问TDengine的简便方式 * [极速体验](/getting-started#demo):运行示例程序,快速体验高效的数据插入、查询 * [支持平台列表](/getting-started#platforms):TDengine服务器和客户端支持的平台列表 +* [Kubernetes部署](https://taosdata.github.io/TDengine-Operator/zh/index.html):TDengine在Kubernetes环境进行部署的详细说明 ## [整体架构](/architecture) @@ -41,7 +42,7 @@ TDengine是一个高效的存储、查询、分析时序大数据的平台,专 * [数据写入](/taos-sql#insert):支持单表单条、多条、多表多条写入,支持历史数据写入 * [数据查询](/taos-sql#select):支持时间段、值过滤、排序、查询结果手动分页等 * [SQL函数](/taos-sql#functions):支持各种聚合函数、选择函数、计算函数,如avg, min, diff等 -* [时间维度聚合](/taos-sql#aggregation):将表中数据按照时间段进行切割后聚合,降维处理 +* [窗口切分聚合](/taos-sql#aggregation):将表中数据按照时间段等方式进行切割后聚合,降维处理 * [边界限制](/taos-sql#limitation):库、表、SQL等边界限制条件 * [错误码](/taos-sql/error-code):TDengine 2.0 错误码以及对应的十进制码 @@ -62,7 +63,7 @@ TDengine是一个高效的存储、查询、分析时序大数据的平台,专 ## [高级功能](/advanced-features) * [连续查询(Continuous Query)](/advanced-features#continuous-query):基于滑动窗口,定时自动的对数据流进行查询计算 -* [数据订阅(Publisher/Subscriber)](/advanced-features#subscribe):象典型的消息队列,应用可订阅接收到的最新数据 +* [数据订阅(Publisher/Subscriber)](/advanced-features#subscribe):类似典型的消息队列,应用可订阅接收到的最新数据 * [缓存(Cache)](/advanced-features#cache):每个设备最新的数据都会缓存在内存中,可快速获取 * [报警监测](/advanced-features#alert):根据配置规则,自动监测超限行为数据,并主动推送 @@ -80,7 +81,7 @@ TDengine是一个高效的存储、查询、分析时序大数据的平台,专 ## [与其他工具的连接](/connections) * [Grafana](/connections#grafana):获取并可视化保存在TDengine的数据 -* [Matlab](/connections#matlab):通过配置Matlab的JDBC数据源访问保存在TDengine的数据 +* [MATLAB](/connections#matlab):通过配置MATLAB的JDBC数据源访问保存在TDengine的数据 * [R](/connections#r):通过配置R的JDBC数据源访问保存在TDengine的数据 * [IDEA Database](https://www.taosdata.com/blog/2020/08/27/1767.html):通过IDEA 数据库管理工具可视化使用 TDengine @@ -105,6 +106,7 @@ TDengine是一个高效的存储、查询、分析时序大数据的平台,专 * [数据导入](/administrator#import):可按脚本文件导入,也可按数据文件导入 * [数据导出](/administrator#export):从shell按表导出,也可用taosdump工具做各种导出 * [系统监控](/administrator#status):检查系统现有的连接、查询、流式计算,日志和事件等 +* [性能优化](/administrator#optimize):对长期运行的系统进行维护优化,保障性能表现 * [文件目录结构](/administrator#directories):TDengine数据文件、配置文件等所在目录 * [参数限制与保留关键字](/administrator#keywords):TDengine的参数限制与保留关键字列表 @@ -117,9 +119,9 @@ TDengine是一个高效的存储、查询、分析时序大数据的平台,专 ## 常用工具 * [TDengine样例导入工具](https://www.taosdata.com/blog/2020/01/18/1166.html) -* [TDengine性能对比测试工具](https://www.taosdata.com/blog/2020/01/18/1166.html) +* [TDengine写入性能测试工具](https://www.taosdata.com/blog/2020/01/18/1166.html) * [IDEA数据库管理工具可视化使用TDengine](https://www.taosdata.com/blog/2020/08/27/1767.html) -* [基于eletron开发的跨平台TDengine图形化管理工具](https://github.com/skye0207/TDengineGUI) +* [基于Electron开发的跨平台TDengine图形化管理工具](https://github.com/skye0207/TDengineGUI) * [DataX,支持TDengine的离线数据采集/同步工具](https://github.com/wgzhao/DataX)(文档:[读取插件](https://github.com/wgzhao/DataX/blob/master/docs/src/main/sphinx/reader/tdenginereader.md)、[写入插件](https://github.com/wgzhao/DataX/blob/master/docs/src/main/sphinx/writer/tdenginewriter.md)) ## TDengine与其他数据库的对比测试 diff --git a/documentation20/cn/01.evaluation/docs.md b/documentation20/cn/01.evaluation/docs.md index 0ae2106ff2a63696dc8bbc51d25bbf5e811ef561..7f70ccec5681ffd751cd1372d9c0926bf3f3beda 100644 --- a/documentation20/cn/01.evaluation/docs.md +++ b/documentation20/cn/01.evaluation/docs.md @@ -9,8 +9,8 @@ TDengine的模块之一是时序数据库。但除此之外,为减少研发的 * __10倍以上的性能提升__:定义了创新的数据存储结构,单核每秒能处理至少2万次请求,插入数百万个数据点,读出一千万以上数据点,比现有通用数据库快十倍以上。 * __硬件或云服务成本降至1/5__:由于超强性能,计算资源不到通用大数据方案的1/5;通过列式存储和先进的压缩算法,存储空间不到通用数据库的1/10。 * __全栈时序数据处理引擎__:将数据库、消息队列、缓存、流式计算等功能融为一体,应用无需再集成Kafka/Redis/HBase/Spark/HDFS等软件,大幅降低应用开发和维护的复杂度成本。 -* __强大的分析功能__:无论是十年前还是一秒钟前的数据,指定时间范围即可查询。数据可在时间轴上或多个设备上进行聚合。即席查询可通过Shell, Python, R, Matlab随时进行。 -* __与第三方工具无缝连接__:不用一行代码,即可与Telegraf, Grafana, EMQ, HiveMQ, Prometheus, Matlab, R等集成。后续将支持OPC, Hadoop, Spark等, BI工具也将无缝连接。 +* __强大的分析功能__:无论是十年前还是一秒钟前的数据,指定时间范围即可查询。数据可在时间轴上或多个设备上进行聚合。即席查询可通过Shell, Python, R, MATLAB随时进行。 +* __与第三方工具无缝连接__:不用一行代码,即可与Telegraf, Grafana, EMQ, HiveMQ, Prometheus, MATLAB, R等集成。后续将支持OPC, Hadoop, Spark等, BI工具也将无缝连接。 * __零运维成本、零学习成本__:安装集群简单快捷,无需分库分表,实时备份。类似标准SQL,支持RESTful, 支持Python/Java/C/C++/C#/Go/Node.js, 与MySQL相似,零学习成本。 采用TDengine,可将典型的物联网、车联网、工业互联网大数据平台的总拥有成本大幅降低。但需要指出的是,因充分利用了物联网时序数据的特点,它无法用来处理网络爬虫、微博、微信、电商、ERP、CRM等通用型数据。 diff --git a/documentation20/cn/02.getting-started/01.docker/docs.md b/documentation20/cn/02.getting-started/01.docker/docs.md new file mode 100644 index 0000000000000000000000000000000000000000..30803d977704606b042c589b96b649d99a850106 --- /dev/null +++ b/documentation20/cn/02.getting-started/01.docker/docs.md @@ -0,0 +1,211 @@ +# 通过 Docker 快速体验 TDengine + +虽然并不推荐在生产环境中通过 Docker 来部署 TDengine 服务,但 Docker 工具能够很好地屏蔽底层操作系统的环境差异,很适合在开发测试或初次体验时用于安装运行 TDengine 的工具集。特别是,借助 Docker,能够比较方便地在 Mac OSX 和 Windows 系统上尝试 TDengine,而无需安装虚拟机或额外租用 Linux 服务器。 + +下文通过 Step by Step 风格的介绍,讲解如何通过 Docker 快速建立 TDengine 的单节点运行环境,以支持开发和测试。 + +## 下载 Docker + +Docker 工具自身的下载请参考 [Docker官网文档](https://docs.docker.com/get-docker/)。 + +安装完毕后可以在命令行终端查看 Docker 版本。如果版本号正常输出,则说明 Docker 环境已经安装成功。 + +```bash +$ docker -v +Docker version 20.10.5, build 55c4c88 +``` + +## 在 Docker 容器中运行 TDengine + +1,使用命令拉取 TDengine 镜像,并使它在后台运行。 + +```bash +$ docker run -d tdengine/tdengine +cdf548465318c6fc2ad97813f89cc60006393392401cae58a27b15ca9171f316 +``` + +- **docker run**:通过 Docker 运行一个容器。 +- **-d**:让容器在后台运行。 +- **tdengine/tdengine**:拉取的 TDengine 官方发布的应用镜像。 +- **cdf548465318c6fc2ad97813f89cc60006393392401cae58a27b15ca9171f316**:这个返回的长字符是容器 ID,我们可以通过容器 ID 来查看对应的容器。 + +2,确认容器是否已经正确运行。 + +```bash +$ docker ps +CONTAINER ID IMAGE COMMAND CREATED STATUS ··· +cdf548465318 tdengine/tdengine "taosd" 14 minutes ago Up 14 minutes ··· +``` + +- **docker ps**:列出所有正在运行状态的容器信息。 +- **CONTAINER ID**:容器 ID。 +- **IMAGE**:使用的镜像。 +- **COMMAND**:启动容器时运行的命令。 +- **CREATED**:容器创建时间。 +- **STATUS**:容器状态。UP 表示运行中。 + +3,进入 Docker 容器内,使用 TDengine。 + +```bash +$ docker exec -it cdf548465318 /bin/bash +root@cdf548465318:~/TDengine-server-2.0.13.0# +``` + +- **docker exec**:通过 docker exec 命令进入容器,如果退出,容器不会停止。 +- **-i**:进入交互模式。 +- **-t**:指定一个终端。 +- **cdf548465318**:容器 ID,需要根据 docker ps 指令返回的值进行修改。 +- **/bin/bash**:载入容器后运行 bash 来进行交互。 + +4,进入容器后,执行 taos shell 客户端程序。 + +```bash +$ root@cdf548465318:~/TDengine-server-2.0.13.0# taos + +Welcome to the TDengine shell from Linux, Client Version:2.0.13.0 +Copyright (c) 2020 by TAOS Data, Inc. All rights reserved. + +taos> +``` + +TDengine 终端成功连接服务端,打印出了欢迎消息和版本信息。如果失败,会有错误信息打印出来。 + +在 TDengine 终端中,可以通过 SQL 命令来创建/删除数据库、表、超级表等,并可以进行插入和查询操作。具体可以参考 [TAOS SQL 说明文档](https://www.taosdata.com/cn/documentation/taos-sql)。 + +## 通过 taosdemo 进一步了解 TDengine + +1,接上面的步骤,先退出 TDengine 终端程序。 + +```bash +$ taos> q +root@cdf548465318:~/TDengine-server-2.0.13.0# +``` + +2,在命令行界面执行 taosdemo。 + +```bash +$ root@cdf548465318:~/TDengine-server-2.0.13.0# taosdemo +################################################################### +# Server IP: localhost:0 +# User: root +# Password: taosdata +# Use metric: true +# Datatype of Columns: int int int int int int int float +# Binary Length(If applicable): -1 +# Number of Columns per record: 3 +# Number of Threads: 10 +# Number of Tables: 10000 +# Number of Data per Table: 100000 +# Records/Request: 1000 +# Database name: test +# Table prefix: t +# Delete method: 0 +# Test time: 2021-04-13 02:05:20 +################################################################### +``` + +回车后,该命令将新建一个数据库 test,并且自动创建一张超级表 meters,并以超级表 meters 为模版创建了 1 万张表,表名从 "t0" 到 "t9999"。每张表有 10 万条记录,每条记录有 f1,f2,f3 三个字段,时间戳 ts 字段从 "2017-07-14 10:40:00 000" 到 "2017-07-14 10:41:39 999"。每张表带有 areaid 和 loc 两个标签 TAG,areaid 被设置为 1 到 10,loc 被设置为 "beijing" 或 "shanghai"。 + +3,进入 TDengine 终端,查看 taosdemo 生成的数据。 + +- **进入命令行。** + +```bash +$ root@cdf548465318:~/TDengine-server-2.0.13.0# taos + +Welcome to the TDengine shell from Linux, Client Version:2.0.13.0 +Copyright (c) 2020 by TAOS Data, Inc. All rights reserved. + +taos> +``` + +- **查看数据库。** + +```bash +$ taos> show databases; + name | created_time | ntables | vgroups | ··· + test | 2021-04-13 02:14:15.950 | 10000 | 6 | ··· + log | 2021-04-12 09:36:37.549 | 4 | 1 | ··· + +``` + +- **查看超级表。** + +```bash +$ taos> use test; +Database changed. + +$ taos> show stables; + name | created_time | columns | tags | tables | +===================================================================================== + meters | 2021-04-13 02:14:15.955 | 4 | 2 | 10000 | +Query OK, 1 row(s) in set (0.001737s) + +``` + +- **查看表,限制输出十条。** + +```bash +$ taos> select * from test.t0 limit 10; + ts | f1 | f2 | f3 | +==================================================================== + 2017-07-14 02:40:01.000 | 3 | 9 | 0 | + 2017-07-14 02:40:02.000 | 0 | 1 | 2 | + 2017-07-14 02:40:03.000 | 7 | 2 | 3 | + 2017-07-14 02:40:04.000 | 9 | 4 | 5 | + 2017-07-14 02:40:05.000 | 1 | 2 | 5 | + 2017-07-14 02:40:06.000 | 6 | 3 | 2 | + 2017-07-14 02:40:07.000 | 4 | 7 | 8 | + 2017-07-14 02:40:08.000 | 4 | 6 | 6 | + 2017-07-14 02:40:09.000 | 5 | 7 | 7 | + 2017-07-14 02:40:10.000 | 1 | 5 | 0 | +Query OK, 10 row(s) in set (0.003638s) + +``` + +- **查看 t0 表的标签值。** + +```bash +$ taos> select areaid, loc from test.t0; + areaid | loc | +=========================== + 10 | shanghai | +Query OK, 1 row(s) in set (0.002904s) + +``` + +## 停止正在 Docker 中运行的 TDengine 服务 + +```bash +$ docker stop cdf548465318 +cdf548465318 +``` + +- **docker stop**:通过 docker stop 停止指定的正在运行中的 docker 镜像。 +- **cdf548465318**:容器 ID,根据 docker ps 指令返回的结果进行修改。 + +## 编程开发时连接在 Docker 中的 TDengine + +从 Docker 之外连接使用在 Docker 容器内运行的 TDengine 服务,有以下两个思路: + +1,通过端口映射(-p),将容器内部开放的网络端口映射到宿主机的指定端口上。通过挂载本地目录(-v),可以实现宿主机与容器内部的数据同步,防止容器删除后,数据丢失。 + +```bash +$ docker run -d -v /etc/taos:/etc/taos -p 6041:6041 tdengine/tdengine +526aa188da767ae94b244226a2b2eec2b5f17dd8eff592893d9ec0cd0f3a1ccd + +$ curl -u root:taosdata -d 'show databases' 127.0.0.1:6041/rest/sql +{"status":"succ","head":["name","created_time","ntables","vgroups","replica","quorum","days","keep1,keep2,keep(D)","cache(MB)","blocks","minrows","maxrows","wallevel","fsync","comp","precision","status"],"data":[],"rows":0} +``` + +- 第一条命令,启动一个运行了 TDengine 的 docker 容器,并且将容器的 6041 端口映射到宿主机的 6041 端口上。 +- 第二条命令,通过 RESTful 接口访问 TDengine,这时连接的是本机的 6041 端口,可见连接成功。 + +注意:在这个示例中,出于方便性考虑,只映射了 RESTful 需要的 6041 端口。如果希望以非 RESTful 方式连接 TDengine 服务,则需要映射从 6030 开始的共 11 个端口(完整的端口情况请参见 [TDengine 2.0 端口说明](https://www.taosdata.com/cn/documentation/faq#port))。在例子中,挂载本地目录也只是处理了配置文件所在的 /etc/taos 目录,而没有挂载数据存储目录。 + +2,直接通过 exec 命令,进入到 docker 容器中去做开发。也即,把程序代码放在 TDengine 服务端所在的同一个 Docker 容器中,连接容器本地的 TDengine 服务。 + +```bash +$ docker exec -it 526aa188da /bin/bash +``` + diff --git a/documentation20/cn/02.getting-started/docs.md b/documentation20/cn/02.getting-started/docs.md index b46322cef28c7c3e78c260680dcc501684e6844a..ab10b28fd3950bfa10e47113696de0829b2da74d 100644 --- a/documentation20/cn/02.getting-started/docs.md +++ b/documentation20/cn/02.getting-started/docs.md @@ -2,27 +2,29 @@ ## 快捷安装 -TDengine软件分为服务器、客户端和报警模块三部分,目前2.0版服务器仅能在Linux系统上安装和运行,后续会支持Windows、mac OS等系统。客户端可以在Windows或Linux上安装和运行。任何OS的应用也可以选择RESTful接口连接服务器taosd。CPU支持X64/ARM64/MIPS64/Alpha64,后续会支持ARM32、RISC-V等CPU架构。用户可根据需求选择通过[源码](https://www.taosdata.com/cn/getting-started/#通过源码安装)或者[安装包](https://www.taosdata.com/cn/getting-started/#通过安装包安装)来安装。 +TDengine 软件分为服务器、客户端和报警模块三部分,目前 2.0 版服务器仅能在 Linux 系统上安装和运行,后续会支持 Windows、Mac OS 等系统。客户端可以在 Windows 或 Linux 上安装和运行。任何 OS 的应用也可以选择 RESTful 接口连接服务器 taosd。CPU 支持 X64/ARM64/MIPS64/Alpha64,后续会支持 ARM32、RISC-V 等 CPU 架构。用户可根据需求选择通过 [源码](https://www.taosdata.com/cn/getting-started/#通过源码安装) 或者 [安装包](https://www.taosdata.com/cn/getting-started/#通过安装包安装) 来安装。 ### 通过源码安装 -请参考我们的[TDengine github主页](https://github.com/taosdata/TDengine)下载源码并安装. +请参考我们的 [TDengine github 主页](https://github.com/taosdata/TDengine) 下载源码并安装. -### 通过Docker容器运行 +### 通过 Docker 容器运行 -请参考[TDengine官方Docker镜像的发布、下载和使用](https://www.taosdata.com/blog/2020/05/13/1509.html) +暂时不建议生产环境采用 Docker 来部署 TDengine 的客户端或服务端,但在开发环境下或初次尝试时,使用 Docker 方式部署是十分方便的。特别是,利用 Docker,可以方便地在 Mac OS X 和 Windows 环境下尝试 TDengine。 + +详细操作方法请参照 [通过 Docker 快速体验 TDengine](https://www.taosdata.com/cn/documentation/getting-started/docker)。 ### 通过安装包安装 -TDengine的安装非常简单,从下载到安装成功仅仅只要几秒钟。服务端安装包包含客户端和连接器,我们提供三种安装包,您可以根据需要选择: +TDengine 的安装非常简单,从下载到安装成功仅仅只要几秒钟。服务端安装包包含客户端和连接器,我们提供三种安装包,您可以根据需要选择: -安装包下载在[这里](https://www.taosdata.com/cn/getting-started/#通过安装包安装)。 +安装包下载在 [这里](https://www.taosdata.com/cn/getting-started/#通过安装包安装)。 -具体的安装过程,请参见[TDengine多种安装包的安装和卸载](https://www.taosdata.com/blog/2019/08/09/566.html)以及[视频教程](https://www.taosdata.com/blog/2020/11/11/1941.html)。 +具体的安装过程,请参见 [TDengine 多种安装包的安装和卸载](https://www.taosdata.com/blog/2019/08/09/566.html) 以及 [视频教程](https://www.taosdata.com/blog/2020/11/11/1941.html)。 ## 轻松启动 -安装成功后,用户可使用`systemctl`命令来启动TDengine的服务进程。 +安装成功后,用户可使用 `systemctl` 命令来启动 TDengine 的服务进程。 ```bash $ systemctl start taosd @@ -33,38 +35,39 @@ $ systemctl start taosd $ systemctl status taosd ``` -如果TDengine服务正常工作,那么您可以通过TDengine的命令行程序`taos`来访问并体验TDengine。 +如果 TDengine 服务正常工作,那么您可以通过 TDengine 的命令行程序 `taos` 来访问并体验 TDengine。 **注意:** -- systemctl命令需要 _root_ 权限来运行,如果您非 _root_ 用户,请在命令前添加 sudo -- 为更好的获得产品反馈,改善产品,TDengine会采集基本的使用信息,但您可以修改系统配置文件taos.cfg里的配置参数telemetryReporting, 将其设为0,就可将其关闭。 -- TDengine采用FQDN(一般就是hostname)作为节点的ID,为保证正常运行,需要给运行taosd的服务器配置好hostname,在客户端应用运行的机器配置好DNS服务或hosts文件,保证FQDN能够解析。 +- systemctl 命令需要 _root_ 权限来运行,如果您非 _root_ 用户,请在命令前添加 sudo 。 +- 为更好的获得产品反馈,改善产品,TDengine 会采集基本的使用信息,但您可以修改系统配置文件 taos.cfg 里的配置参数 telemetryReporting, 将其设为 0,就可将其关闭。 +- TDengine 采用 FQDN (一般就是 hostname )作为节点的 ID,为保证正常运行,需要给运行 taosd 的服务器配置好 hostname,在客户端应用运行的机器配置好 DNS 服务或 hosts 文件,保证 FQDN 能够解析。 +- `systemctl stop taosd` 指令在执行后并不会马上停止 TDengine 服务,而是会等待系统中必要的落盘工作正常完成。在数据量很大的情况下,这可能会消耗较长时间。 -* TDengine 支持在使用[`systemd`](https://en.wikipedia.org/wiki/Systemd)做进程服务管理的linux系统上安装,用`which systemctl`命令来检测系统中是否存在`systemd`包: +* TDengine 支持在使用 [`systemd`](https://en.wikipedia.org/wiki/Systemd) 做进程服务管理的 linux 系统上安装,用 `which systemctl` 命令来检测系统中是否存在 `systemd` 包: ```bash $ which systemctl ``` - 如果系统中不支持systemd,也可以用手动运行 /usr/local/taos/bin/taosd 方式启动 TDengine 服务。 + 如果系统中不支持 systemd,也可以用手动运行 /usr/local/taos/bin/taosd 方式启动 TDengine 服务。 -## TDengine命令行程序 +## TDengine 命令行程序 -执行TDengine命令行程序,您只要在Linux终端执行`taos`即可。 +执行 TDengine 命令行程序,您只要在 Linux 终端执行 `taos` 即可。 ```bash $ taos ``` -如果TDengine终端连接服务成功,将会打印出欢迎消息和版本信息。如果失败,则会打印错误消息出来(请参考[FAQ](https://www.taosdata.com/cn/documentation/faq/)来解决终端连接服务端失败的问题)。TDengine终端的提示符号如下: +如果 TDengine 终端连接服务成功,将会打印出欢迎消息和版本信息。如果失败,则会打印错误消息出来(请参考 [FAQ](https://www.taosdata.com/cn/documentation/faq/) 来解决终端连接服务端失败的问题)。TDengine 终端的提示符号如下: ```cmd taos> ``` -在TDengine终端中,用户可以通过SQL命令来创建/删除数据库、表等,并进行插入查询操作。在终端中运行的SQL语句需要以分号结束来运行。示例: +在 TDengine 终端中,用户可以通过 SQL 命令来创建/删除数据库、表等,并进行插入查询操作。在终端中运行的 SQL 语句需要以分号结束来运行。示例: ```mysql create database demo; @@ -73,24 +76,24 @@ create table t (ts timestamp, speed int); insert into t values ('2019-07-15 00:00:00', 10); insert into t values ('2019-07-15 01:00:00', 20); select * from t; - ts | speed | -=================================== - 19-07-15 00:00:00.000| 10| - 19-07-15 01:00:00.000| 20| -Query OK, 2 row(s) in set (0.001700s) + ts | speed | +======================================== + 2019-07-15 00:00:00.000 | 10 | + 2019-07-15 01:00:00.000 | 20 | +Query OK, 2 row(s) in set (0.003128s) ``` -除执行SQL语句外,系统管理员还可以从TDengine终端检查系统运行状态,添加删除用户账号等。 +除执行 SQL 语句外,系统管理员还可以从 TDengine 终端检查系统运行状态,添加删除用户账号等。 ### 命令行参数 -您可通过配置命令行参数来改变TDengine终端的行为。以下为常用的几个命令行参数: +您可通过配置命令行参数来改变 TDengine 终端的行为。以下为常用的几个命令行参数: -- -c, --config-dir: 指定配置文件目录,默认为_/etc/taos_ -- -h, --host: 指定服务的IP地址,默认为本地服务 -- -s, --commands: 在不进入终端的情况下运行TDengine命令 -- -u, -- user: 连接TDengine服务器的用户名,缺省为root -- -p, --password: 连接TDengine服务器的密码,缺省为taosdata +- -c, --config-dir: 指定配置文件目录,默认为 _/etc/taos_ +- -h, --host: 指定服务的 FQDN 地址(也可以使用 IP),默认为连接本地服务 +- -s, --commands: 在不进入终端的情况下运行 TDengine 命令 +- -u, --user: 连接 TDengine 服务器的用户名,缺省为 root +- -p, --password: 连接TDengine服务器的密码,缺省为 taosdata - -?, --help: 打印出所有命令行参数 示例: @@ -99,7 +102,7 @@ Query OK, 2 row(s) in set (0.001700s) $ taos -h 192.168.0.1 -s "use db; show tables;" ``` -### 运行SQL命令脚本 +### 运行 SQL 命令脚本 TDengine 终端可以通过 `source` 命令来运行 SQL 命令脚本. @@ -107,27 +110,27 @@ TDengine 终端可以通过 `source` 命令来运行 SQL 命令脚本. taos> source ; ``` -### Shell小技巧 +### Shell 小技巧 - 可以使用上下光标键查看历史输入的指令 -- 修改用户密码。在 shell 中使用 alter user 指令 +- 修改用户密码,在 shell 中使用 alter user 指令 - ctrl+c 中止正在进行中的查询 - 执行 `RESET QUERY CACHE` 清空本地缓存的表 schema ## TDengine 极速体验 -启动TDengine的服务,在Linux终端执行taosdemo +启动 TDengine 的服务,在 Linux 终端执行 taosdemo ```bash $ taosdemo ``` -该命令将在数据库test下面自动创建一张超级表meters,该超级表下有1万张表,表名为"t0" 到"t9999",每张表有10万条记录,每条记录有 (f1, f2, f3)三个字段,时间戳从"2017-07-14 10:40:00 000" 到"2017-07-14 10:41:39 999",每张表带有标签areaid和loc, areaid被设置为1到10, loc被设置为"beijing"或者“shanghai"。 +该命令将在数据库 test 下面自动创建一张超级表 meters,该超级表下有 1 万张表,表名为 "d0" 到 "d9999",每张表有 1 万条记录,每条记录有 (ts, current, voltage, phase) 四个字段,时间戳从 "2017-07-14 10:40:00 000" 到 "2017-07-14 10:40:09 999",每张表带有标签 location 和 groupdId,groupdId 被设置为 1 到 10, location 被设置为 "beijing" 或者 "shanghai"。 -执行这条命令大概需要10分钟,最后共插入10亿条记录。 +执行这条命令大概需要几分钟,最后共插入 1 亿条记录。 -在TDengine客户端输入查询命令,体验查询速度。 +在 TDengine 客户端输入查询命令,体验查询速度。 - 查询超级表下记录总条数: @@ -135,72 +138,64 @@ $ taosdemo taos> select count(*) from test.meters; ``` -- 查询10亿条记录的平均值、最大值、最小值等: +- 查询 1 亿条记录的平均值、最大值、最小值等: ```mysql -taos> select avg(f1), max(f2), min(f3) from test.meters; +taos> select avg(current), max(voltage), min(phase) from test.meters; ``` -- 查询loc="beijing"的记录总条数: +- 查询 location="beijing" 的记录总条数: ```mysql -taos> select count(*) from test.meters where loc="beijing"; +taos> select count(*) from test.meters where location="beijing"; ``` -- 查询areaid=10的所有记录的平均值、最大值、最小值等: +- 查询 groupdId=10 的所有记录的平均值、最大值、最小值等: ```mysql -taos> select avg(f1), max(f2), min(f3) from test.meters where areaid=10; +taos> select avg(current), max(voltage), min(phase) from test.meters where groupdId=10; ``` -- 对表t10按10s进行平均值、最大值和最小值聚合统计: +- 对表 d10 按 10s 进行平均值、最大值和最小值聚合统计: ```mysql -taos> select avg(f1), max(f2), min(f3) from test.t10 interval(10s); +taos> select avg(current), max(voltage), min(phase) from test.d10 interval(10s); ``` -**Note:** taosdemo命令本身带有很多选项,配置表的数目、记录条数等等,请执行 `taosdemo --help`详细列出。您可以设置不同参数进行体验。 +**Note:** taosdemo 命令本身带有很多选项,配置表的数目、记录条数等等,请执行 `taosdemo --help` 详细列出。您可以设置不同参数进行体验。 ## 客户端和报警模块 -如果客户端和服务端运行在不同的电脑上,可以单独安装客户端。Linux和Windows安装包如下: - -- TDengine-client-2.0.10.0-Linux-x64.tar.gz(3.0M) -- TDengine-client-2.0.10.0-Windows-x64.exe(2.8M) -- TDengine-client-2.0.10.0-Windows-x86.exe(2.8M) - -报警模块的Linux安装包如下(请参考[报警模块的使用方法](https://github.com/taosdata/TDengine/blob/master/alert/README_cn.md)): +如果客户端和服务端运行在不同的电脑上,可以单独安装客户端。Linux 和 Windows 安装包可以在 [这里](https://www.taosdata.com/cn/getting-started/#客户端) 下载。 -- TDengine-alert-2.0.10.0-Linux-x64.tar.gz (8.1M) +报警模块的 Linux 和 Windows 安装包请在 [所有下载链接](https://www.taosdata.com/cn/all-downloads/) 页面搜索“TDengine Alert Linux”章节或“TDengine Alert Windows”章节进行下载。使用方法请参考 [报警模块的使用方法](https://github.com/taosdata/TDengine/blob/master/alert/README_cn.md)。 ## 支持平台列表 -### TDengine服务器支持的平台列表 +### TDengine 服务器支持的平台列表 | | **CentOS 6/7/8** | **Ubuntu 16/18/20** | **Other Linux** | **统信 UOS** | **银河/中标麒麟** | **凝思 V60/V80** | **华为 EulerOS** | | -------------- | --------------------- | ------------------------ | --------------- | --------------- | ------------------------- | --------------------- | --------------------- | | X64 | ● | ● | | ○ | ● | ● | ● | -| 树莓派 ARM32 | | ● | ● | | | | | | 龙芯 MIPS64 | | | ● | | | | | -| 鲲鹏 ARM64 | | ○ | ○ | | ● | | | -| 申威 Alpha64 | | | ○ | ● | | | | +| 鲲鹏 ARM64 | | ○ | ○ | | ● | | | +| 申威 Alpha64 | | | ○ | ● | | | | | 飞腾 ARM64 | | ○ 优麒麟 | | | | | | | 海光 X64 | ● | ● | ● | ○ | ● | ● | | -| 瑞芯微 ARM64/32 | | | ○ | | | | | -| 全志 ARM64/32 | | | ○ | | | | | -| 炬力 ARM64/32 | | | ○ | | | | | -| TI ARM32 | | | ○ | | | | | -| 华为云 ARM64 | | | | | | | ● | +| 瑞芯微 ARM64 | | | ○ | | | | | +| 全志 ARM64 | | | ○ | | | | | +| 炬力 ARM64 | | | ○ | | | | | +| 华为云 ARM64 | | | | | | | ● | 注: ● 表示经过官方测试验证, ○ 表示非官方测试验证。 -### TDengine客户端和连接器支持的平台列表 +### TDengine 客户端和连接器支持的平台列表 -目前TDengine的连接器可支持的平台广泛,目前包括:X64/X86/ARM64/ARM32/MIPS/Alpha等硬件平台,以及Linux/Win64/Win32等开发环境。 +目前 TDengine 的连接器可支持的平台广泛,目前包括:X64/X86/ARM64/ARM32/MIPS/Alpha 等硬件平台,以及 Linux/Win64/Win32 等开发环境。 对照矩阵如下: @@ -217,5 +212,5 @@ taos> select avg(f1), max(f2), min(f3) from test.t10 interval(10s); 注: ● 表示经过官方测试验证, ○ 表示非官方测试验证。 -请跳转到 [连接器](https://www.taosdata.com/cn/documentation/connector)查看更详细的信息。 +请跳转到 [连接器](https://www.taosdata.com/cn/documentation/connector) 查看更详细的信息。 diff --git a/documentation20/cn/03.architecture/02.replica/docs.md b/documentation20/cn/03.architecture/02.replica/docs.md index 8e1b1e3ab1513fbeaa5b9b805263485a13483b9b..27ac7f123cdd2a56df9e65ae0fa13d1ff8faa23d 100644 --- a/documentation20/cn/03.architecture/02.replica/docs.md +++ b/documentation20/cn/03.architecture/02.replica/docs.md @@ -107,11 +107,11 @@ TDengine采取的是Master-Slave模式进行同步,与流行的RAFT一致性 ![replica-forward.png](page://images/architecture/replica-forward.png) -1. 应用对写请求做基本的合法性检查,通过,则给改请求包打上一个版本号(version, 单调递增) +1. 应用对写请求做基本的合法性检查,通过,则给该请求包打上一个版本号(version, 单调递增) 2. 应用将打上版本号的写请求封装一个WAL Head, 写入WAL(Write Ahead Log) -3. 应用调用API syncForwardToPeer,如多vnode B是slave状态,sync模块将包含WAL Head的数据包通过Forward消息发送给vnode B,否则就不转发。 +3. 应用调用API syncForwardToPeer,如果vnode B是slave状态,sync模块将包含WAL Head的数据包通过Forward消息发送给vnode B,否则就不转发。 4. vnode B收到Forward消息后,调用回调函数writeToCache, 交给应用处理 -5. vnode B应用在写入成功后,都需要调用syncAckForward通知sync模块已经写入成功。 +5. vnode B应用在写入成功后,都需要调用syncConfirmForward通知sync模块已经写入成功。 6. 如果quorum大于1,vnode B需要等待应用的回复确认,收到确认后,vnode B发送Forward Response消息给node A。 7. 如果quorum大于1,vnode A需要等待vnode B或其他副本对Forward消息的确认。 8. 如果quorum大于1,vnode A收到quorum-1条确认消息后,调用回调函数confirmForward,通知应用写入成功。 @@ -140,7 +140,7 @@ TDengine采取的是Master-Slave模式进行同步,与流行的RAFT一致性 整个数据恢复流程分为两大步骤,第一步,先恢复archived data(file), 然后恢复wal。具体流程如下: -![replica-forward.png](page://images/architecture/replica-forward.png) +![replica-restore.png](page://images/architecture/replica-restore.png) 1. 通过已经建立的TCP连接,发送sync req给master节点 2. master收到sync req后,以client的身份,向vnode B主动建立一新的专用于同步的TCP连接(syncFd) @@ -219,7 +219,7 @@ Arbitrator的程序tarbitrator.c在复制模块的同一目录, 编译整个系 不同之处: -- 选举流程不一样:Raft里任何一个节点是candidate时,主动向其他节点发出vote request, 如果超过半数回答Yes, 这个candidate就成为Leader,开始一个新的term. 而TDengine的实现里,节点上线、离线或角色改变都会触发状态消息在节点组类传播,等节点组里状态稳定一致之后才触发选举流程,因为状态稳定一致,基于同样的状态信息,每个节点做出的决定会是一致的,一旦某个节点符合成为master的条件,无需其他节点认可,它会自动将自己设为master。TDengine里,任何一个节点检测到其他节点或自己的角色发生改变,就会给节点组内其他节点进行广播的。Raft里不存在这样的机制,因此需要投票来解决。 +- 选举流程不一样:Raft里任何一个节点是candidate时,主动向其他节点发出vote request,如果超过半数回答Yes,这个candidate就成为Leader,开始一个新的term。而TDengine的实现里,节点上线、离线或角色改变都会触发状态消息在节点组内传播,等节点组里状态稳定一致之后才触发选举流程,因为状态稳定一致,基于同样的状态信息,每个节点做出的决定会是一致的,一旦某个节点符合成为master的条件,无需其他节点认可,它会自动将自己设为master。TDengine里,任何一个节点检测到其他节点或自己的角色发生改变,就会向节点组内其他节点进行广播。Raft里不存在这样的机制,因此需要投票来解决。 - 对WAL的一条记录,Raft用term + index来做唯一标识。但TDengine只用version(类似index),在TDengine实现里,仅仅用version是完全可行的, 因为TDengine的选举机制,没有term的概念。 如果整个虚拟节点组全部宕机,重启,但不是所有虚拟节点都上线,这个时候TDengine是不会选出master的,因为未上线的节点有可能有最高version的数据。而RAFT协议,只要超过半数上线,就会选出Leader。 diff --git a/documentation20/cn/03.architecture/docs.md b/documentation20/cn/03.architecture/docs.md index 87553fa8ad9760ecdb6d1667823d336189542331..b481bea9f840ad459812f955aa76a8a7829d5b37 100644 --- a/documentation20/cn/03.architecture/docs.md +++ b/documentation20/cn/03.architecture/docs.md @@ -176,9 +176,9 @@ TDengine 分布式架构的逻辑结构图如下: **通讯方式:**TDengine系统的各个数据节点之间,以及应用驱动与各数据节点之间的通讯是通过TCP/UDP进行的。因为考虑到物联网场景,数据写入的包一般不大,因此TDengine 除采用TCP做传输之外,还采用UDP方式,因为UDP 更加高效,而且不受连接数的限制。TDengine实现了自己的超时、重传、确认等机制,以确保UDP的可靠传输。对于数据量不到15K的数据包,采取UDP的方式进行传输,超过15K的,或者是查询类的操作,自动采取TCP的方式进行传输。同时,TDengine根据配置和数据包,会自动对数据进行压缩/解压缩,数字签名/认证等处理。对于数据节点之间的数据复制,只采用TCP方式进行数据传输。 -**FQDN配置**:一个数据节点有一个或多个FQDN,可以在系统配置文件taos.cfg通过参数“fqdn"进行指定,如果没有指定,系统将自动获取计算机的hostname作为其FQDN。如果节点没有配置FQDN,可以直接将该节点的配置参数fqdn设置为它的IP地址。但不建议使用IP,因为IP地址可变,一旦变化,将让集群无法正常工作。一个数据节点的EP(End Point)由FQDN + Port组成。采用FQDN,需要保证DNS服务正常工作,或者在节点以及应用所在的节点配置好hosts文件。 +**FQDN配置**:一个数据节点有一个或多个FQDN,可以在系统配置文件taos.cfg通过参数“fqdn"进行指定,如果没有指定,系统将自动获取计算机的hostname作为其FQDN。如果节点没有配置FQDN,可以直接将该节点的配置参数fqdn设置为它的IP地址。但不建议使用IP,因为IP地址可变,一旦变化,将让集群无法正常工作。一个数据节点的EP(End Point)由FQDN + Port组成。采用FQDN,需要保证DNS服务正常工作,或者在节点以及应用所在的节点配置好hosts文件。另外,这个参数值的长度需要控制在 96 个字符以内。 -**端口配置:**一个数据节点对外的端口由TDengine的系统配置参数serverPort决定,对集群内部通讯的端口是serverPort+5。集群内数据节点之间的数据复制操作还占有一个TCP端口,是serverPort+10. 为支持多线程高效的处理UDP数据,每个对内和对外的UDP连接,都需要占用5个连续的端口。因此一个数据节点总的端口范围为serverPort到serverPort + 10,总共11个TCP/UDP端口。(另外还可能有 RESTful、Arbitrator 所使用的端口,那样的话就一共是 13 个。)使用时,需要确保防火墙将这些端口打开,以备使用。每个数据节点可以配置不同的serverPort。 +**端口配置:**一个数据节点对外的端口由TDengine的系统配置参数serverPort决定,对集群内部通讯的端口是serverPort+5。集群内数据节点之间的数据复制操作还占有一个TCP端口,是serverPort+10. 为支持多线程高效的处理UDP数据,每个对内和对外的UDP连接,都需要占用5个连续的端口。因此一个数据节点总的端口范围为serverPort到serverPort + 10,总共11个TCP/UDP端口。(另外还可能有 RESTful、Arbitrator 所使用的端口,那样的话就一共是 13 个。)使用时,需要确保防火墙将这些端口打开,以备使用。每个数据节点可以配置不同的serverPort。(详细的端口情况请参见 [TDengine 2.0 端口说明](https://www.taosdata.com/cn/documentation/faq#port)) **集群对外连接:** TDengine集群可以容纳单个、多个甚至几千个数据节点。应用只需要向集群中任何一个数据节点发起连接即可,连接需要提供的网络参数是一数据节点的End Point(FQDN加配置的端口号)。通过命令行CLI启动应用taos时,可以通过选项-h来指定数据节点的FQDN, -P来指定其配置的端口号,如果端口不配置,将采用TDengine的系统配置参数serverPort。 @@ -323,8 +323,6 @@ Vnode会保持一个数据版本号(Version),对内存数据进行持久化存 采用同步复制,系统的性能会有所下降,而且latency会增加。因为元数据要强一致,mnode之间的数据同步缺省就是采用的同步复制。 -注:vnode之间的同步复制仅仅企业版支持 - ## 缓存与持久化 ### 缓存 @@ -343,7 +341,7 @@ TDengine采用数据驱动的方式让缓存中的数据写入硬盘进行持久 对于采集的数据,一般有保留时长,这个时长由系统配置参数keep决定。超过这个设置天数的数据文件,将被系统自动删除,释放存储空间。 -给定days与keep两个参数,一个vnode总的数据文件数为:keep/days。总的数据文件个数不宜过大,也不宜过小。10到100以内合适。基于这个原则,可以设置合理的days。 目前的版本,参数keep可以修改,但对于参数days,一但设置后,不可修改。 +给定days与keep两个参数,一个典型工作状态的vnode中总的数据文件数为:`向上取整(keep/days)+1`个。总的数据文件个数不宜过大,也不宜过小。10到100以内合适。基于这个原则,可以设置合理的days。 目前的版本,参数keep可以修改,但对于参数days,一但设置后,不可修改。 在每个数据文件里,一张表的数据是一块一块存储的。一张表可以有一到多个数据文件块。在一个文件块里,数据是列式存储的,占用的是一片连续的存储空间,这样大大提高读取速度。文件块的大小由系统参数maxRows(每块最大记录条数)决定,缺省值为4096。这个值不宜过大,也不宜过小。过大,定位具体时间段的数据的搜索时间会变长,影响读取速度;过小,数据块的索引太大,压缩效率偏低,也影响读取速度。 diff --git a/documentation20/cn/06.queries/docs.md b/documentation20/cn/06.queries/docs.md index a161778a72728ca05a75538c8b04ca0277e01bb2..5557134aac23b4f69066c9fb41aaa51972fcbba3 100644 --- a/documentation20/cn/06.queries/docs.md +++ b/documentation20/cn/06.queries/docs.md @@ -12,7 +12,7 @@ TDengine 采用 SQL 作为查询语言。应用程序可以通过 C/C++, Java, G - 时间戳对齐的连接查询(Join Query: 隐式连接)操作 - 多种聚合/计算函数: count, max, min, avg, sum, twa, stddev, leastsquares, top, bottom, first, last, percentile, apercentile, last_row, spread, diff等 -例如:在TAOS Shell中,从表d1001中查询出vlotage > 215的记录,按时间降序排列,仅仅输出2条。 +例如:在TAOS Shell中,从表d1001中查询出voltage > 215的记录,按时间降序排列,仅仅输出2条。 ```mysql taos> select * from d1001 where voltage > 215 order by ts desc limit 2; ts | current | voltage | phase | diff --git a/documentation20/cn/07.advanced-features/docs.md b/documentation20/cn/07.advanced-features/docs.md index 650a2ca96b759bd6b8123dbb64023496b654dcd0..1077f299ee2a2e93589d0246af7633a6886c6756 100644 --- a/documentation20/cn/07.advanced-features/docs.md +++ b/documentation20/cn/07.advanced-features/docs.md @@ -120,7 +120,7 @@ if (async) { } ``` -TDengine中的订阅既可以是同步的,也可以是异步的,上面的代码会根据从命令行获取的参数`async`的值来决定使用哪种方式。这里,同步的意思是用户程序要直接调用`taos_consume`来拉取数据,而异步则由API在内部的另一个线程中调用`taos_consume`,然后把拉取到的数据交给回调函数`subscribe_callback`去处理。 +TDengine中的订阅既可以是同步的,也可以是异步的,上面的代码会根据从命令行获取的参数`async`的值来决定使用哪种方式。这里,同步的意思是用户程序要直接调用`taos_consume`来拉取数据,而异步则由API在内部的另一个线程中调用`taos_consume`,然后把拉取到的数据交给回调函数`subscribe_callback`去处理。(注意,`subscribe_callback` 中不宜做较为耗时的操作,否则有可能导致客户端阻塞等不可控的问题。) 参数`taos`是一个已经建立好的数据库连接,在同步模式下无特殊要求。但在异步模式下,需要注意它不会被其它线程使用,否则可能导致不可预计的错误,因为回调函数在API的内部线程中被调用,而TDengine的部分API不是线程安全的。 diff --git a/documentation20/cn/08.connector/01.java/docs.md b/documentation20/cn/08.connector/01.java/docs.md index 3442a2248cd3743cc93034fb5aa9d13b96079543..511bab8a605ce666d263d609d1599e30c85d78c4 100644 --- a/documentation20/cn/08.connector/01.java/docs.md +++ b/documentation20/cn/08.connector/01.java/docs.md @@ -16,7 +16,6 @@ TDengine 的 JDBC 驱动实现尽可能与关系型数据库驱动保持一致 * TDengine 目前不支持针对单条数据记录的删除操作。 * 目前不支持事务操作。 -* 目前不支持表间的 union 操作。 * 目前不支持嵌套查询(nested query)。 * 对每个 Connection 的实例,至多只能有一个打开的 ResultSet 实例;如果在 ResultSet 还没关闭的情况下执行了新的查询,taos-jdbcdriver 会自动关闭上一个 ResultSet。 @@ -50,6 +49,7 @@ TDengine 的 JDBC 驱动实现尽可能与关系型数据库驱动保持一致 +注意:与 JNI 方式不同,RESTful 接口是无状态的,因此 `USE db_name` 指令没有效果,RESTful 下所有对表名、超级表名的引用都需要指定数据库名前缀。 ## 如何获取 taos-jdbcdriver @@ -267,7 +267,9 @@ while(resultSet.next()){ > 查询和操作关系型数据库一致,使用下标获取返回字段内容时从 1 开始,建议使用字段名称获取。 ### 处理异常 + 在报错后,通过SQLException可以获取到错误的信息和错误码: + ```java try (Statement statement = connection.createStatement()) { // executeQuery @@ -280,11 +282,90 @@ try (Statement statement = connection.createStatement()) { e.printStackTrace(); } ``` + JDBC连接器可能报错的错误码包括3种:JDBC driver本身的报错(错误码在0x2301到0x2350之间),JNI方法的报错(错误码在0x2351到0x2400之间),TDengine其他功能模块的报错。 具体的错误码请参考: * https://github.com/taosdata/TDengine/blob/develop/src/connector/jdbc/src/main/java/com/taosdata/jdbc/TSDBErrorNumbers.java * https://github.com/taosdata/TDengine/blob/develop/src/inc/taoserror.h +### 通过参数绑定写入数据 + +从 2.1.2.0 版本开始,TDengine 的 **JDBC-JNI** 实现大幅改进了参数绑定方式对数据写入(INSERT)场景的支持。采用这种方式写入数据时,能避免 SQL 语法解析的资源消耗,从而在很多情况下显著提升写入性能。(注意:**JDBC-RESTful** 实现并不提供参数绑定这种使用方式。) + +```java +Statement stmt = conn.createStatement(); +Random r = new Random(); + +// INSERT 语句中,VALUES 部分允许指定具体的数据列;如果采取自动建表,则 TAGS 部分需要设定全部 TAGS 列的参数值: +TSDBPreparedStatement s = (TSDBPreparedStatement) conn.prepareStatement("insert into ? using weather_test tags (?, ?) (ts, c1, c2) values(?, ?, ?)"); + +// 设定数据表名: +s.setTableName("w1"); +// 设定 TAGS 取值: +s.setTagInt(0, r.nextInt(10)); +s.setTagString(1, "Beijing"); + +int numOfRows = 10; + +// VALUES 部分以逐列的方式进行设置: +ArrayList ts = new ArrayList<>(); +for (int i = 0; i < numOfRows; i++){ + ts.add(System.currentTimeMillis() + i); +} +s.setTimestamp(0, ts); + +ArrayList s1 = new ArrayList<>(); +for (int i = 0; i < numOfRows; i++){ + s1.add(r.nextInt(100)); +} +s.setInt(1, s1); + +ArrayList s2 = new ArrayList<>(); +for (int i = 0; i < numOfRows; i++){ + s2.add("test" + r.nextInt(100)); +} +s.setString(2, s2, 10); + +// AddBatch 之后,缓存并未清空。为避免混乱,并不推荐在 ExecuteBatch 之前再次绑定新一批的数据: +s.columnDataAddBatch(); +// 执行绑定数据后的语句: +s.columnDataExecuteBatch(); +// 执行语句后清空缓存。在清空之后,可以复用当前的对象,绑定新的一批数据(可以是新表名、新 TAGS 值、新 VALUES 值): +s.columnDataClearBatch(); +// 执行完毕,释放资源: +s.columnDataCloseBatch(); +``` + +用于设定 TAGS 取值的方法总共有: +```java +public void setTagNull(int index, int type) +public void setTagBoolean(int index, boolean value) +public void setTagInt(int index, int value) +public void setTagByte(int index, byte value) +public void setTagShort(int index, short value) +public void setTagLong(int index, long value) +public void setTagTimestamp(int index, long value) +public void setTagFloat(int index, float value) +public void setTagDouble(int index, double value) +public void setTagString(int index, String value) +public void setTagNString(int index, String value) +``` + +用于设定 VALUES 数据列的取值的方法总共有: +```java +public void setInt(int columnIndex, ArrayList list) throws SQLException +public void setFloat(int columnIndex, ArrayList list) throws SQLException +public void setTimestamp(int columnIndex, ArrayList list) throws SQLException +public void setLong(int columnIndex, ArrayList list) throws SQLException +public void setDouble(int columnIndex, ArrayList list) throws SQLException +public void setBoolean(int columnIndex, ArrayList list) throws SQLException +public void setByte(int columnIndex, ArrayList list) throws SQLException +public void setShort(int columnIndex, ArrayList list) throws SQLException +public void setString(int columnIndex, ArrayList list, int size) throws SQLException +public void setNString(int columnIndex, ArrayList list, int size) throws SQLException +``` +其中 setString 和 setNString 都要求用户在 size 参数里声明表定义中对应列的列宽。 + ### 订阅 #### 创建 @@ -447,12 +528,13 @@ Query OK, 1 row(s) in set (0.000141s) -## TAOS-JDBCDriver 版本以及支持的 TDengine 版本和 JDK 版本 +## TAOS-JDBCDriver 版本以及支持的 TDengine 版本和 JDK 版本 | taos-jdbcdriver 版本 | TDengine 版本 | JDK 版本 | | -------------------- | ----------------- | -------- | -| 2.0.22 | 2.0.18.0 及以上 | 1.8.x | -| 2.0.12 - 2.0.21 | 2.0.8.0 - 2.0.17.0 | 1.8.x | +| 2.0.31 | 2.1.3.0 及以上 | 1.8.x | +| 2.0.22 - 2.0.30 | 2.0.18.0 - 2.1.2.x | 1.8.x | +| 2.0.12 - 2.0.21 | 2.0.8.0 - 2.0.17.x | 1.8.x | | 2.0.4 - 2.0.11 | 2.0.0.0 - 2.0.7.x | 1.8.x | | 1.0.3 | 1.6.1.x 及以上 | 1.8.x | | 1.0.2 | 1.6.1.x 及以上 | 1.8.x | @@ -471,7 +553,7 @@ TDengine 目前支持时间戳、数字、字符、布尔类型,与 Java 对 | BIGINT | java.lang.Long | | FLOAT | java.lang.Float | | DOUBLE | java.lang.Double | -| SMALLINT | java.lang.Short | +| SMALLINT | java.lang.Short | | TINYINT | java.lang.Byte | | BOOL | java.lang.Boolean | | BINARY | byte array | diff --git a/documentation20/cn/08.connector/docs.md b/documentation20/cn/08.connector/docs.md index 5a6c26587a025d73711e91263010887ac3e027b4..3a6e884f56addc7d2d4ccacad57ef3baa6844a4b 100644 --- a/documentation20/cn/08.connector/docs.md +++ b/documentation20/cn/08.connector/docs.md @@ -32,7 +32,7 @@ TDengine提供了丰富的应用程序开发接口,其中包括C/C++、Java、 **Linux** -**1. 从涛思官网(https://www.taosdata.com/cn/all-downloads/)下载** +**1. 从[涛思官网](https://www.taosdata.com/cn/all-downloads/)下载** * X64硬件环境:TDengine-client-2.x.x.x-Linux-x64.tar.gz @@ -56,7 +56,7 @@ TDengine提供了丰富的应用程序开发接口,其中包括C/C++、Java、 ​ *taos.tar.gz*:应用驱动安装包 ​ *driver*:TDengine应用驱动driver ​ *connector*: 各种编程语言连接器(go/grafanaplugin/nodejs/python/JDBC) -​ *examples*: 各种编程语言的示例程序(c/C#/go/JDBC/matlab/python/R) +​ *examples*: 各种编程语言的示例程序(c/C#/go/JDBC/MATLAB/python/R) 运行install_client.sh进行安装 @@ -68,7 +68,7 @@ TDengine提供了丰富的应用程序开发接口,其中包括C/C++、Java、 **Windows x64/x86** -**1. 从涛思官网(https://www.taosdata.com/cn/all-downloads/)下载 :** +**1. 从[涛思官网](https://www.taosdata.com/cn/all-downloads/)下载 :** * X64硬件环境:TDengine-client-2.X.X.X-Windows-x64.exe @@ -213,7 +213,7 @@ C/C++的API类似于MySQL的C API。应用程序使用时,需要包含TDengine - `int taos_result_precision(TAOS_RES *res)` - 返回结果集时间戳字段的精度,`0` 代表毫秒,`1` 代表微秒,`2` 代表纳秒。 + 返回结果集时间戳字段的精度,`0` 代表毫秒,`1` 代表微秒。 - `TAOS_ROW taos_fetch_row(TAOS_RES *res)` @@ -259,7 +259,7 @@ typedef struct taosField { 获取最近一次API调用失败的原因,返回值为字符串。 -- `char *taos_errno(TAOS_RES *res)` +- `int taos_errno(TAOS_RES *res)` 获取最近一次API调用失败的原因,返回值为错误代码。 @@ -291,9 +291,27 @@ typedef struct taosField { TDengine的异步API均采用非阻塞调用模式。应用程序可以用多线程同时打开多张表,并可以同时对每张打开的表进行查询或者插入操作。需要指出的是,**客户端应用必须确保对同一张表的操作完全串行化**,即对同一个表的插入或查询操作未完成时(未返回时),不能够执行第二个插入或查询操作。 -### 参数绑定API + +### 参数绑定 API -除了直接调用 `taos_query` 进行查询,TDengine也提供了支持参数绑定的Prepare API,与 MySQL 一样,这些API目前也仅支持用问号`?`来代表待绑定的参数,具体如下: +除了直接调用 `taos_query` 进行查询,TDengine 也提供了支持参数绑定的 Prepare API,与 MySQL 一样,这些 API 目前也仅支持用问号 `?` 来代表待绑定的参数。 + +从 2.1.1.0 和 2.1.2.0 版本开始,TDengine 大幅改进了参数绑定接口对数据写入(INSERT)场景的支持。这样在通过参数绑定接口写入数据时,就避免了 SQL 语法解析的资源消耗,从而在绝大多数情况下显著提升写入性能。此时的典型操作步骤如下: +1. 调用 `taos_stmt_init` 创建参数绑定对象; +2. 调用 `taos_stmt_prepare` 解析 INSERT 语句; +3. 如果 INSERT 语句中预留了表名但没有预留 TAGS,那么调用 `taos_stmt_set_tbname` 来设置表名; +4. 如果 INSERT 语句中既预留了表名又预留了 TAGS(例如 INSERT 语句采取的是自动建表的方式),那么调用 `taos_stmt_set_tbname_tags` 来设置表名和 TAGS 的值; +5. 调用 `taos_stmt_bind_param_batch` 以多列的方式设置 VALUES 的值,或者调用 `taos_stmt_bind_param` 以单行的方式设置 VALUES 的值; +6. 调用 `taos_stmt_add_batch` 把当前绑定的参数加入批处理; +7. 可以重复第 3~6 步,为批处理加入更多的数据行; +8. 调用 `taos_stmt_execute` 执行已经准备好的批处理指令; +9. 执行完毕,调用 `taos_stmt_close` 释放所有资源。 + +说明:如果 `taos_stmt_execute` 执行成功,假如不需要改变 SQL 语句的话,那么是可以复用 `taos_stmt_prepare` 的解析结果,直接进行第 3~6 步绑定新数据的。但如果执行出错,那么并不建议继续在当前的环境上下文下继续工作,而是建议释放资源,然后从 `taos_stmt_init` 步骤重新开始。 + +除 C/C++ 语言外,TDengine 的 Java 语言 JNI Connector 也提供参数绑定接口支持,具体请另外参见:[参数绑定接口的 Java 用法](https://www.taosdata.com/cn/documentation/connector/java#stmt-java)。 + +接口相关的具体函数如下(也可以参考 [apitest.c](https://github.com/taosdata/TDengine/blob/develop/tests/examples/c/apitest.c) 文件中使用对应函数的方式): - `TAOS_STMT* taos_stmt_init(TAOS *taos)` @@ -301,11 +319,12 @@ TDengine的异步API均采用非阻塞调用模式。应用程序可以用多线 - `int taos_stmt_prepare(TAOS_STMT *stmt, const char *sql, unsigned long length)` - 解析一条sql语句,将解析结果和参数信息绑定到stmt上,如果参数length大于0,将使用此参数作为sql语句的长度,如等于0,将自动判断sql语句的长度。 + 解析一条 SQL 语句,将解析结果和参数信息绑定到 stmt 上,如果参数 length 大于 0,将使用此参数作为 SQL 语句的长度,如等于 0,将自动判断 SQL 语句的长度。 - `int taos_stmt_bind_param(TAOS_STMT *stmt, TAOS_BIND *bind)` - 进行参数绑定,bind指向一个数组,需保证此数组的元素数量和顺序与sql语句中的参数完全一致。TAOS_BIND 的使用方法与 MySQL中的 MYSQL_BIND 一致,具体定义如下: + 不如 `taos_stmt_bind_param_batch` 效率高,但可以支持非 INSERT 类型的 SQL 语句。 + 进行参数绑定,bind 指向一个数组(代表所要绑定的一行数据),需保证此数组中的元素数量和顺序与 SQL 语句中的参数完全一致。TAOS_BIND 的使用方法与 MySQL 中的 MYSQL_BIND 一致,具体定义如下: ```c typedef struct TAOS_BIND { @@ -319,9 +338,35 @@ typedef struct TAOS_BIND { } TAOS_BIND; ``` +- `int taos_stmt_set_tbname(TAOS_STMT* stmt, const char* name)` + + (2.1.1.0 版本新增,仅支持用于替换 INSERT 语句中的参数值) + 当 SQL 语句中的表名使用了 `?` 占位时,可以使用此函数绑定一个具体的表名。 + +- `int taos_stmt_set_tbname_tags(TAOS_STMT* stmt, const char* name, TAOS_BIND* tags)` + + (2.1.2.0 版本新增,仅支持用于替换 INSERT 语句中的参数值) + 当 SQL 语句中的表名和 TAGS 都使用了 `?` 占位时,可以使用此函数绑定具体的表名和具体的 TAGS 取值。最典型的使用场景是使用了自动建表功能的 INSERT 语句(目前版本不支持指定具体的 TAGS 列)。tags 参数中的列数量需要与 SQL 语句中要求的 TAGS 数量完全一致。 + +- `int taos_stmt_bind_param_batch(TAOS_STMT* stmt, TAOS_MULTI_BIND* bind)` + + (2.1.1.0 版本新增,仅支持用于替换 INSERT 语句中的参数值) + 以多列的方式传递待绑定的数据,需要保证这里传递的数据列的顺序、列的数量与 SQL 语句中的 VALUES 参数完全一致。TAOS_MULTI_BIND 的具体定义如下: + +```c +typedef struct TAOS_MULTI_BIND { + int buffer_type; + void * buffer; + uintptr_t buffer_length; + int32_t * length; + char * is_null; + int num; // 列的个数,即 buffer 中的参数个数 +} TAOS_MULTI_BIND; +``` + - `int taos_stmt_add_batch(TAOS_STMT *stmt)` - 将当前绑定的参数加入批处理中,调用此函数后,可以再次调用`taos_stmt_bind_param`绑定新的参数。需要注意,此函数仅支持 insert/import 语句,如果是select等其他SQL语句,将返回错误。 + 将当前绑定的参数加入批处理中,调用此函数后,可以再次调用 `taos_stmt_bind_param` 或 `taos_stmt_bind_param_batch` 绑定新的参数。需要注意,此函数仅支持 INSERT/IMPORT 语句,如果是 SELECT 等其他 SQL 语句,将返回错误。 - `int taos_stmt_execute(TAOS_STMT *stmt)` @@ -329,12 +374,17 @@ typedef struct TAOS_BIND { - `TAOS_RES* taos_stmt_use_result(TAOS_STMT *stmt)` - 获取语句的结果集。结果集的使用方式与非参数化调用时一致,使用完成后,应对此结果集调用 `taos_free_result`以释放资源。 + 获取语句的结果集。结果集的使用方式与非参数化调用时一致,使用完成后,应对此结果集调用 `taos_free_result` 以释放资源。 - `int taos_stmt_close(TAOS_STMT *stmt)` 执行完毕,释放所有资源。 +- `char * taos_stmt_errstr(TAOS_STMT *stmt)` + + (2.1.3.0 版本新增) + 用于在其他 stmt API 返回错误(返回错误码或空指针)时获取错误信息。 + ### 连续查询接口 TDengine提供时间驱动的实时流式计算API。可以每隔一指定的时间段,对一张或多张数据库的表(数据流)进行各种实时聚合计算操作。操作简单,仅有打开、关闭流的API。具体如下: @@ -345,11 +395,11 @@ TDengine提供时间驱动的实时流式计算API。可以每隔一指定的时 * taos:已经建立好的数据库连接 * sql:SQL查询语句(仅能使用查询语句) * fp:用户定义的回调函数指针,每次流式计算完成后,TDengine将查询的结果(TAOS_ROW)、查询状态(TAOS_RES)、用户定义参数(PARAM)传递给回调函数,在回调函数内,用户可以使用taos_num_fields获取结果集列数,taos_fetch_fields获取结果集每列数据的类型。 - * stime:是流式计算开始的时间,如果是0,表示从现在开始,如果不为零,表示从指定的时间开始计算(UTC时间从1970/1/1算起的毫秒数) + * stime:是流式计算开始的时间。如果是“64位整数最小值”,表示从现在开始;如果不为“64位整数最小值”,表示从指定的时间开始计算(UTC时间从1970/1/1算起的毫秒数)。 * param:是应用提供的用于回调的一个参数,回调时,提供给应用 * callback: 第二个回调函数,会在连续查询自动停止时被调用。 - 返回值为NULL,表示创建成功,返回值不为空,表示成功。 + 返回值为NULL,表示创建失败;返回值不为空,表示成功。 - `void taos_close_stream (TAOS_STREAM *tstr)` @@ -377,11 +427,15 @@ TDengine提供时间驱动的实时流式计算API。可以每隔一指定的时 * res:查询结果集,注意结果集中可能没有记录 * param:调用 `taos_subscribe`时客户程序提供的附加参数 * code:错误码 + + **注意**:在这个回调函数里不可以做耗时过长的处理,尤其是对于返回的结果集中数据较多的情况,否则有可能导致客户端阻塞等异常状态。如果必须进行复杂计算,则建议在另外的线程中进行处理。 * `TAOS_RES *taos_consume(TAOS_SUB *tsub)` 同步模式下,该函数用来获取订阅的结果。 用户应用程序将其置于一个循环之中。 如两次调用`taos_consume`的间隔小于订阅的轮询周期,API将会阻塞,直到时间间隔超过此周期。 如果数据库有新记录到达,该API将返回该最新的记录,否则返回一个没有记录的空结果集。 如果返回值为 `NULL`,说明系统出错。 异步模式下,用户程序不应调用此API。 + **注意**:在调用 `taos_consume()` 之后,用户应用应确保尽快调用 `taos_fetch_row()` 或 `taos_fetch_block()` 来处理订阅结果,否则服务端会持续缓存查询结果数据等待客户端读取,极端情况下会导致服务端内存消耗殆尽,影响服务稳定性。 + * `void taos_unsubscribe(TAOS_SUB *tsub, int keepProgress)` 取消订阅。 如参数 `keepProgress` 不为0,API会保留订阅的进度信息,后续调用 `taos_subscribe` 时可以基于此进度继续;否则将删除进度信息,后续只能重新开始读取数据。 @@ -503,6 +557,13 @@ c1.close() conn.close() ``` +#### 关于纳秒 (nanosecond) 在 Python 连接器中的说明 + +由于目前 Python 对 nanosecond 支持的不完善(参见链接 1. 2. ),目前的实现方式是在 nanosecond 精度时返回整数,而不是 ms 和 us 返回的 datetime 类型,应用开发者需要自行处理,建议使用 pandas 的 to_datetime()。未来如果 Python 正式完整支持了纳秒,涛思数据可能会修改相关接口。 + +1. https://stackoverflow.com/questions/10611328/parsing-datetime-strings-containing-nanoseconds +2. https://www.python.org/dev/peps/pep-0564/ + #### 帮助信息 用户可通过python的帮助信息直接查看模块的使用信息,或者参考tests/examples/python中的示例程序。以下为部分常用类和方法: @@ -515,7 +576,7 @@ conn.close() - _TDengineCursor_ 类 参考python中help(taos.TDengineCursor)。 - 这个类对应客户端进行的写入、查询操作。在客户端多线程的场景下,这个游标实例必须保持线程独享,不能夸线程共享使用,否则会导致返回结果出现错误。 + 这个类对应客户端进行的写入、查询操作。在客户端多线程的场景下,这个游标实例必须保持线程独享,不能跨线程共享使用,否则会导致返回结果出现错误。 - _connect_ 方法 @@ -534,7 +595,9 @@ conn.close() ## RESTful Connector -为支持各种不同类型平台的开发,TDengine提供符合REST设计标准的API,即RESTful API。为最大程度降低学习成本,不同于其他数据库RESTful API的设计方法,TDengine直接通过HTTP POST 请求BODY中包含的SQL语句来操作数据库,仅需要一个URL。RESTful连接器的使用参见[视频教程](https://www.taosdata.com/blog/2020/11/11/1965.html)。 +为支持各种不同类型平台的开发,TDengine 提供符合 REST 设计标准的 API,即 RESTful API。为最大程度降低学习成本,不同于其他数据库 RESTful API 的设计方法,TDengine 直接通过 HTTP POST 请求 BODY 中包含的 SQL 语句来操作数据库,仅需要一个 URL。RESTful 连接器的使用参见[视频教程](https://www.taosdata.com/blog/2020/11/11/1965.html)。 + +注意:与标准连接器的一个区别是,RESTful 接口是无状态的,因此 `USE db_name` 指令没有效果,所有对表名、超级表名的引用都需要指定数据库名前缀。 ### HTTP请求格式 @@ -738,7 +801,7 @@ HTTP请求URL采用`sqlutc`时,返回结果集的时间戳将采用UTC时间 下面仅列出一些与RESTful接口有关的配置参数,其他系统参数请看配置文件里的说明。注意:配置修改后,需要重启taosd服务才能生效 -- httpPort: 对外提供RESTful服务的端口号,默认绑定到6041 +- 对外提供RESTful服务的端口号,默认绑定到 6041(实际取值是 serverPort + 11,因此可以通过修改 serverPort 参数的设置来修改) - httpMaxThreads: 启动的线程数量,默认为2(2.0.17版本开始,默认值改为CPU核数的一半向下取整) - restfulRowLimit: 返回结果集(JSON格式)的最大条数,默认值为10240 - httpEnableCompress: 是否支持压缩,默认不支持,目前TDengine仅支持gzip压缩格式 @@ -752,7 +815,7 @@ C#连接器支持的系统有:Linux 64/Windows x64/Windows x86 * 应用驱动安装请参考[安装连接器驱动步骤](https://www.taosdata.com/cn/documentation/connector#driver)。 * .NET接口文件TDengineDrivercs.cs和参考程序示例TDengineTest.cs均位于Windows客户端install_directory/examples/C#目录下。 -* 在Windows系统上,C#应用程序可以使用TDengine的原生C接口来执行所有数据库操作,后续版本将提供ORM(dapper)框架驱动。 +* 在Windows系统上,C#应用程序可以使用TDengine的原生C接口来执行所有数据库操作,后续版本将提供ORM(Dapper)框架驱动。 ### 安装验证 @@ -844,11 +907,15 @@ go env -w GOPROXY=https://goproxy.io,direct sql.Open内置的方法,Close closes the statement. +### 其他代码示例 + +[Consume Messages from Kafka](https://github.com/taosdata/go-demo-kafka) 是一个通过 Go 语言实现消费 Kafka 队列写入 TDengine 的示例程序,也可以作为通过 Go 连接 TDengine 的写法参考。 + ## Node.js Connector Node.js连接器支持的系统有: -| **CPU类型** | x64(64bit) | | | aarch64 | aarch32 | +|**CPU类型** | x64(64bit) | | | aarch64 | aarch32 | | ------------ | ------------ | -------- | -------- | -------- | -------- | | **OS类型** | Linux | Win64 | Win32 | Linux | Linux | | **支持与否** | **支持** | **支持** | **支持** | **支持** | **支持** | diff --git a/documentation20/cn/09.connections/docs.md b/documentation20/cn/09.connections/docs.md index 79380f3bbd9680120f63f89a0bfbe6f31f5c7a74..b47f297ae0a68c91e5d38aad000acdb14591283d 100644 --- a/documentation20/cn/09.connections/docs.md +++ b/documentation20/cn/09.connections/docs.md @@ -16,7 +16,7 @@ TDengine的Grafana插件在安装包的/usr/local/taos/connector/grafanaplugin 以CentOS 7.2操作系统为例,将grafanaplugin目录拷贝到/var/lib/grafana/plugins目录下,重新启动grafana即可。 ```bash -sudo cp -rf /usr/local/taos/connector/grafanaplugin /var/lib/grafana/tdengine +sudo cp -rf /usr/local/taos/connector/grafanaplugin /var/lib/grafana/plugins/tdengine ``` ### 使用 Grafana @@ -75,50 +75,45 @@ sudo cp -rf /usr/local/taos/connector/grafanaplugin /var/lib/grafana/tdengine ![img](page://images/connections/import_dashboard2.jpg) -## Matlab +## MATLAB -MatLab可以通过安装包内提供的JDBC Driver直接连接到TDengine获取数据到本地工作空间。 +MATLAB 可以通过安装包内提供的 JDBC Driver 直接连接到 TDengine 获取数据到本地工作空间。 -### MatLab的JDBC接口适配 +### MATLAB 的 JDBC 接口适配 -MatLab的适配有下面几个步骤,下面以Windows10上适配MatLab2017a为例: +MATLAB 的适配有下面几个步骤,下面以 Windows 10 上适配 MATLAB2021a 为例: -- 将TDengine安装包内的驱动程序JDBCDriver-1.0.0-dist.jar拷贝到${matlab_root}\MATLAB\R2017a\java\jar\toolbox -- 将TDengine安装包内的taos.lib文件拷贝至${matlab_ root _dir}\MATLAB\R2017a\lib\win64 -- 将新添加的驱动jar包加入MatLab的classpath。在${matlab_ root _dir}\MATLAB\R2017a\toolbox\local\classpath.txt文件中添加下面一行 -​ +- 将 TDengine 客户端安装路径下的 `\TDengine\connector\jdbc的驱动程序taos-jdbcdriver-2.0.25-dist.jar` 拷贝到 `${matlab_root}\MATLAB\R2021a\java\jar\toolbox`。 +- 将 TDengine 安装包内的 `taos.lib` 文件拷贝至 `${matlab_root_dir}\MATLAB\R2021\lib\win64`。 +- 将新添加的驱动 jar 包加入 MATLAB 的 classpath。在 `${matlab_root_dir}\MATLAB\R2021a\toolbox\local\classpath.txt` 文件中添加下面一行: ``` -$matlabroot/java/jar/toolbox/JDBCDriver-1.0.0-dist.jar +$matlabroot/java/jar/toolbox/taos-jdbcdriver-2.0.25-dist.jar ``` -- 在${user_home}\AppData\Roaming\MathWorks\MATLAB\R2017a\下添加一个文件javalibrarypath.txt, 并在该文件中添加taos.dll的路径,比如您的taos.dll是在安装时拷贝到了C:\Windows\System32下,那么就应该在javalibrarypath.txt中添加如下一行: -​ +- 在 `${user_home}\AppData\Roaming\MathWorks\MATLAB\R2021a\` 下添加一个文件 `javalibrarypath.txt`,并在该文件中添加 taos.dll 的路径,比如您的 taos.dll 是在安装时拷贝到了 `C:\Windows\System32` 下,那么就应该在 `javalibrarypath.txt` 中添加如下一行: ``` C:\Windows\System32 ``` -### 在MatLab中连接TDengine获取数据 +### 在 MATLAB 中连接 TDengine 获取数据 -在成功进行了上述配置后,打开MatLab。 +在成功进行了上述配置后,打开 MATLAB。 - 创建一个连接: - ```matlab -conn = database(‘db’, ‘root’, ‘taosdata’, ‘com.taosdata.jdbc.TSDBDriver’, ‘jdbc:TSDB://127.0.0.1:0/’) +conn = database(‘test’, ‘root’, ‘taosdata’, ‘com.taosdata.jdbc.TSDBDriver’, ‘jdbc:TSDB://192.168.1.94:6030/’) ``` - 执行一次查询: - ```matlab sql0 = [‘select * from tb’] data = select(conn, sql0); ``` - 插入一条记录: - ```matlab sql1 = [‘insert into tb values (now, 1)’] exec(conn, sql1) ``` -更多例子细节请参考安装包内examples\Matlab\TDengineDemo.m文件。 +更多例子细节请参考安装包内 `examples\Matlab\TDengineDemo.m` 文件。 ## R diff --git a/documentation20/cn/10.cluster/docs.md b/documentation20/cn/10.cluster/docs.md index a430ce8277b49a3dbf7062fc078a47a3d848f8d8..db20ca4edb6513f70ebbf17969be1c20dccb6163 100644 --- a/documentation20/cn/10.cluster/docs.md +++ b/documentation20/cn/10.cluster/docs.md @@ -55,12 +55,11 @@ arbitrator ha.taosdata.com:6042 | 4 | statusInterval | dnode向mnode报告状态时长 | | 5 | arbitrator | 系统中裁决器的end point | | 6 | timezone | 时区 | -| 7 | locale | 系统区位信息及编码格式 | -| 8 | charset | 字符集编码 | -| 9 | balance | 是否启动负载均衡 | -| 10 | maxTablesPerVnode | 每个vnode中能够创建的最大表个数 | -| 11 | maxVgroupsPerDb | 每个DB中能够使用的最大vgroup个数 | +| 7 | balance | 是否启动负载均衡 | +| 8 | maxTablesPerVnode | 每个vnode中能够创建的最大表个数 | +| 9 | maxVgroupsPerDb | 每个DB中能够使用的最大vgroup个数 | +备注:在 2.0.19.0 及更早的版本中,除以上 9 项参数外,dnode 加入集群时,还会要求 locale 和 charset 参数的取值也一致。 ## 启动第一个数据节点 @@ -86,7 +85,7 @@ taos> 将后续的数据节点添加到现有集群,具体有以下几步: -1. 按照[《立即开始》](https://www.taosdata.com/cn/documentation/getting-started/)一章的方法在每个物理节点启动taosd; +1. 按照[《立即开始》](https://www.taosdata.com/cn/documentation/getting-started/)一章的方法在每个物理节点启动taosd;(注意:每个物理节点都需要在 taos.cfg 文件中将 firstEP 参数配置为新集群首个节点的 End Point——在本例中是 h1.taos.com:6030) 2. 在第一个数据节点,使用CLI程序taos, 登录进TDengine系统, 执行命令: diff --git a/documentation20/cn/11.administrator/docs.md b/documentation20/cn/11.administrator/docs.md index cc8689786d1725efdff82610190d4a6b1e34f906..b37916d790c5353db3a500173f4ef727a30c766d 100644 --- a/documentation20/cn/11.administrator/docs.md +++ b/documentation20/cn/11.administrator/docs.md @@ -99,9 +99,8 @@ taosd -C 下面仅仅列出一些重要的配置参数,更多的参数请看配置文件里的说明。各个参数的详细介绍及作用请看前述章节,而且这些参数的缺省配置都是工作的,一般无需设置。**注意:配置修改后,需要重启*taosd*服务才能生效。** - firstEp: taosd启动时,主动连接的集群中首个dnode的end point, 默认值为localhost:6030。 -- fqdn:数据节点的FQDN,缺省为操作系统配置的第一个hostname。如果习惯IP地址访问,可设置为该节点的IP地址。 -- serverPort:taosd启动后,对外服务的端口号,默认值为6030。 -- httpPort: RESTful服务使用的端口号,所有的HTTP请求(TCP)都需要向该接口发起查询/写入请求, 默认值为6041。 +- fqdn:数据节点的FQDN,缺省为操作系统配置的第一个hostname。如果习惯IP地址访问,可设置为该节点的IP地址。这个参数值的长度需要控制在 96 个字符以内。 +- serverPort:taosd启动后,对外服务的端口号,默认值为6030。(RESTful服务使用的端口号是在此基础上+11,即默认值为6041。) - dataDir: 数据文件目录,所有的数据文件都将写入该目录。默认值:/var/lib/taos。 - logDir:日志文件目录,客户端和服务器的运行日志文件将写入该目录。默认值:/var/log/taos。 - arbitrator:系统中裁决器的end point, 缺省值为空。 @@ -115,22 +114,24 @@ taosd -C - queryBufferSize: 为所有并发查询占用保留的内存大小。计算规则可以根据实际应用可能的最大并发数和表的数字相乘,再乘 170 。单位为 MB(2.0.15 以前的版本中,此参数的单位是字节)。 - ratioOfQueryCores: 设置查询线程的最大数量。最小值0 表示只有1个查询线程;最大值2表示最大建立2倍CPU核数的查询线程。默认为1,表示最大和CPU核数相等的查询线程。该值可以为小数,即0.5表示最大建立CPU核数一半的查询线程。 -**注意:**对于端口,TDengine会使用从serverPort起13个连续的TCP和UDP端口号,请务必在防火墙打开。因此如果是缺省配置,需要打开从6030到6042共13个端口,而且必须TCP和UDP都打开。 +**注意:**对于端口,TDengine会使用从serverPort起13个连续的TCP和UDP端口号,请务必在防火墙打开。因此如果是缺省配置,需要打开从6030到6042共13个端口,而且必须TCP和UDP都打开。(详细的端口情况请参见 [TDengine 2.0 端口说明](https://www.taosdata.com/cn/documentation/faq#port)) -不同应用场景的数据往往具有不同的数据特征,比如保留天数、副本数、采集频次、记录大小、采集点的数量、压缩等都可完全不同。为获得在存储上的最高效率,TDengine提供如下存储相关的系统配置参数: +不同应用场景的数据往往具有不同的数据特征,比如保留天数、副本数、采集频次、记录大小、采集点的数量、压缩等都可完全不同。为获得在存储上的最高效率,TDengine提供如下存储相关的系统配置参数(既可以作为 create database 指令的参数,也可以写在 taos.cfg 配置文件中用来设定创建新数据库时所采用的默认值): -- days:一个数据文件存储数据的时间跨度,单位为天,默认值:10。 -- keep:数据库中数据保留的天数,单位为天,默认值:3650。(可通过 alter database 修改) -- minRows:文件块中记录的最小条数,单位为条,默认值:100。 -- maxRows:文件块中记录的最大条数,单位为条,默认值:4096。 -- comp:文件压缩标志位,0:关闭;1:一阶段压缩;2:两阶段压缩。默认值:2。(可通过 alter database 修改) -- walLevel:WAL级别。1:写wal,但不执行fsync;2:写wal, 而且执行fsync。默认值:1。 +- days:一个数据文件存储数据的时间跨度。单位为天,默认值:10。 +- keep:数据库中数据保留的天数。单位为天,默认值:3650。(可通过 alter database 修改) +- minRows:文件块中记录的最小条数。单位为条,默认值:100。 +- maxRows:文件块中记录的最大条数。单位为条,默认值:4096。 +- comp:文件压缩标志位。0:关闭;1:一阶段压缩;2:两阶段压缩。默认值:2。(可通过 alter database 修改) +- wal:WAL级别。1:写wal,但不执行fsync;2:写wal, 而且执行fsync。默认值:1。(在 taos.cfg 中参数名需要写作 walLevel) - fsync:当wal设置为2时,执行fsync的周期。设置为0,表示每次写入,立即执行fsync。单位为毫秒,默认值:3000。 -- cache:内存块的大小,单位为兆字节(MB),默认值:16。 +- cache:内存块的大小。单位为兆字节(MB),默认值:16。 - blocks:每个VNODE(TSDB)中有多少cache大小的内存块。因此一个VNODE的用的内存大小粗略为(cache * blocks)。单位为块,默认值:4。(可通过 alter database 修改) -- replica:副本个数,取值范围:1-3。单位为个,默认值:1。(可通过 alter database 修改) -- precision:时间戳精度标识,ms表示毫秒,us表示微秒。默认值:ms。 -- cacheLast:是否在内存中缓存子表 last_row,0:关闭;1:开启。默认值:0。(可通过 alter database 修改)(从 2.0.11 版本开始支持此参数) +- replica:副本个数。取值范围:1-3,单位为个,默认值:1。(可通过 alter database 修改) +- quorum:多副本环境下指令执行的确认数要求。取值范围:1、2,单位为个,默认值:1。(可通过 alter database 修改) +- precision:时间戳精度标识。ms表示毫秒,us表示微秒,默认值:ms。(2.1.2.0 版本之前、2.0.20.7 版本之前在 taos.cfg 文件中不支持此参数。) +- cacheLast:是否在内存中缓存子表的最近数据。0:关闭;1:缓存子表最近一行数据;2:缓存子表每一列的最近的非NULL值;3:同时打开缓存最近行和列功能。默认值:0。(可通过 alter database 修改)(从 2.1.2.0 版本开始此参数支持 0~3 的取值范围,在此之前取值只能是 [0, 1];而 2.0.11.0 之前的版本在 SQL 指令中不支持此参数。)(2.1.2.0 版本之前、2.0.20.7 版本之前在 taos.cfg 文件中不支持此参数。) +- update:是否允许更新。0:不允许;1:允许。默认值:0。 对于一个应用场景,可能有多种数据特征的数据并存,最佳的设计是将具有相同数据特征的表放在一个库里,这样一个应用有多个库,而每个库可以配置不同的存储参数,从而保证系统有最优的性能。TDengine允许应用在创建库时指定上述存储参数,如果指定,该参数就将覆盖对应的系统配置参数。举例,有下述SQL: @@ -142,15 +143,18 @@ taosd -C TDengine集群中加入一个新的dnode时,涉及集群相关的一些参数必须与已有集群的配置相同,否则不能成功加入到集群中。会进行校验的参数如下: -- numOfMnodes:系统中管理节点个数。默认值:3。 -- balance:是否启动负载均衡。0:否,1:是。默认值:1。 +- numOfMnodes:系统中管理节点个数。默认值:3。(2.0 版本从 2.0.20.11 开始、2.1 及以上版本从 2.1.6.0 开始,numOfMnodes 默认值改为 1。) - mnodeEqualVnodeNum: 一个mnode等同于vnode消耗的个数。默认值:4。 - offlineThreshold: dnode离线阈值,超过该时间将导致该dnode从集群中删除。单位为秒,默认值:86400*10(即10天)。 - statusInterval: dnode向mnode报告状态时长。单位为秒,默认值:1。 - maxTablesPerVnode: 每个vnode中能够创建的最大表个数。默认值:1000000。 - maxVgroupsPerDb: 每个数据库中能够使用的最大vgroup个数。 - arbitrator: 系统中裁决器的end point,缺省为空。 -- timezone、locale、charset 的配置见客户端配置。 +- timezone、locale、charset 的配置见客户端配置。(2.0.20.0 及以上的版本里,集群中加入新节点已不要求 locale 和 charset 参数取值一致) +- balance:是否启用负载均衡。0:否,1:是。默认值:1。 +- flowctrl:是否启用非阻塞流控。0:否,1:是。默认值:1。 +- slaveQuery:是否启用 slave vnode 参与查询。0:否,1:是。默认值:1。 +- adjustMaster:是否启用 vnode master 负载均衡。0:否,1:是。默认值:1。 为方便调试,可通过SQL语句临时调整每个dnode的日志配置,系统重启后会失效: @@ -414,6 +418,19 @@ TDengine启动后,会自动创建一个监测数据库log,并自动将服务 这些监测信息的采集缺省是打开的,但可以修改配置文件里的选项enableMonitor将其关闭或打开。 + +## 性能优化 + +因数据行 [update](https://www.taosdata.com/cn/documentation/faq#update)、表删除、数据过期等原因,TDengine 的磁盘存储文件有可能出现数据碎片,影响查询操作的性能表现。从 2.1.3.0 版本开始,新增 SQL 指令 COMPACT 来启动碎片重整过程: + +```mysql +COMPACT VNODES IN (vg_id1, vg_id2, ...) +``` + +COMPACT 命令对指定的一个或多个 VGroup 启动碎片重整,系统会通过任务队列尽快安排重整操作的具体执行。COMPACT 指令所需的 VGroup id,可以通过 `SHOW VGROUPS;` 指令的输出结果获取;而且在 `SHOW VGROUPS;` 中会有一个 compacting 列,值为 1 时表示对应的 VGroup 正在进行碎片重整,为 0 时则表示并没有处于重整状态。 + +需要注意的是,碎片重整操作会大幅消耗磁盘 I/O。因此在重整进行期间,有可能会影响节点的写入和查询性能,甚至在极端情况下导致短时间的阻写。 + ## 文件目录结构 安装TDengine后,默认会在操作系统中生成下列目录或文件: @@ -445,7 +462,7 @@ TDengine的所有可执行文件默认存放在 _/usr/local/taos/bin_ 目录下 - 数据库名:不能包含“.”以及特殊字符,不能超过 32 个字符 - 表名:不能包含“.”以及特殊字符,与所属数据库名一起,不能超过 192 个字符 - 表的列名:不能包含特殊字符,不能超过 64 个字符 -- 数据库名、表名、列名,都不能以数字开头 +- 数据库名、表名、列名,都不能以数字开头,合法的可用字符集是“英文字符、数字和下划线” - 表的列数:不能超过 1024 列 - 记录的最大长度:包括时间戳 8 byte,不能超过 16KB(每个 BINARY/NCHAR 类型的列还会额外占用 2 个 byte 的存储位置) - 单条 SQL 语句默认最大字符串长度:65480 byte @@ -461,43 +478,44 @@ TDengine的所有可执行文件默认存放在 _/usr/local/taos/bin_ 目录下 目前 TDengine 有将近 200 个内部保留关键字,这些关键字无论大小写均不可以用作库名、表名、STable 名、数据列名及标签列名等。这些关键字列表如下: -| 关键字列表 | | | | | -| ---------- | ----------- | ------------ | ---------- | --------- | -| ABLOCKS | CONNECTIONS | GT | MNODES | SLIDING | -| ABORT | COPY | ID | MODULES | SLIMIT | -| ACCOUNT | COUNT | IF | NCHAR | SMALLINT | -| ACCOUNTS | CREATE | IGNORE | NE | SPREAD | -| ADD | CTIME | IMMEDIATE | NONE | STABLE | -| AFTER | DATABASE | IMPORT | NOT | STABLES | -| ALL | DATABASES | IN | NOTNULL | STAR | -| ALTER | DAYS | INITIALLY | NOW | STATEMENT | -| AND | DEFERRED | INSERT | OF | STDDEV | -| AS | DELIMITERS | INSTEAD | OFFSET | STREAM | -| ASC | DESC | INTEGER | OR | STREAMS | -| ATTACH | DESCRIBE | INTERVAL | ORDER | STRING | -| AVG | DETACH | INTO | PASS | SUM | -| BEFORE | DIFF | IP | PERCENTILE | TABLE | -| BEGIN | DISTINCT | IS | PLUS | TABLES | -| BETWEEN | DIVIDE | ISNULL | PRAGMA | TAG | -| BIGINT | DNODE | JOIN | PREV | TAGS | -| BINARY | DNODES | KEEP | PRIVILEGE | TBLOCKS | -| BITAND | DOT | KEY | QUERIES | TBNAME | -| BITNOT | DOUBLE | KILL | QUERY | TIMES | -| BITOR | DROP | LAST | RAISE | TIMESTAMP | -| BOOL | EACH | LE | REM | TINYINT | -| BOTTOM | END | LEASTSQUARES | REPLACE | TOP | -| BY | EQ | LIKE | REPLICA | TRIGGER | -| CACHE | EXISTS | LIMIT | RESET | UMINUS | -| CASCADE | EXPLAIN | LINEAR | RESTRICT | UPLUS | -| CHANGE | FAIL | LOCAL | ROW | USE | -| CLOG | FILL | LP | ROWS | USER | -| CLUSTER | FIRST | LSHIFT | RP | USERS | -| COLON | FLOAT | LT | RSHIFT | USING | -| COLUMN | FOR | MATCH | SCORES | VALUES | -| COMMA | FROM | MAX | SELECT | VARIABLE | -| COMP | GE | METRIC | SEMI | VGROUPS | -| CONCAT | GLOB | METRICS | SET | VIEW | -| CONFIGS | GRANTS | MIN | SHOW | WAVG | -| CONFLICT | GROUP | MINUS | SLASH | WHERE | -| CONNECTION | | | | | +| 关键字列表 | | | | | +| ------------ | ------------ | ------------ | ------------ | ------------ | +| ABORT | CREATE | IGNORE | NULL | STAR | +| ACCOUNT | CTIME | IMMEDIATE | OF | STATE | +| ACCOUNTS | DATABASE | IMPORT | OFFSET | STATEMENT | +| ADD | DATABASES | IN | OR | STATE_WINDOW | +| AFTER | DAYS | INITIALLY | ORDER | STORAGE | +| ALL | DBS | INSERT | PARTITIONS | STREAM | +| ALTER | DEFERRED | INSTEAD | PASS | STREAMS | +| AND | DELIMITERS | INT | PLUS | STRING | +| AS | DESC | INTEGER | PPS | SYNCDB | +| ASC | DESCRIBE | INTERVAL | PRECISION | TABLE | +| ATTACH | DETACH | INTO | PREV | TABLES | +| BEFORE | DISTINCT | IS | PRIVILEGE | TAG | +| BEGIN | DIVIDE | ISNULL | QTIME | TAGS | +| BETWEEN | DNODE | JOIN | QUERIES | TBNAME | +| BIGINT | DNODES | KEEP | QUERY | TIMES | +| BINARY | DOT | KEY | QUORUM | TIMESTAMP | +| BITAND | DOUBLE | KILL | RAISE | TINYINT | +| BITNOT | DROP | LE | REM | TOPIC | +| BITOR | EACH | LIKE | REPLACE | TOPICS | +| BLOCKS | END | LIMIT | REPLICA | TRIGGER | +| BOOL | EQ | LINEAR | RESET | TSERIES | +| BY | EXISTS | LOCAL | RESTRICT | UMINUS | +| CACHE | EXPLAIN | LP | ROW | UNION | +| CACHELAST | FAIL | LSHIFT | RP | UNSIGNED | +| CASCADE | FILE | LT | RSHIFT | UPDATE | +| CHANGE | FILL | MATCH | SCORES | UPLUS | +| CLUSTER | FLOAT | MAXROWS | SELECT | USE | +| COLON | FOR | MINROWS | SEMI | USER | +| COLUMN | FROM | MINUS | SESSION | USERS | +| COMMA | FSYNC | MNODES | SET | USING | +| COMP | GE | MODIFY | SHOW | VALUES | +| COMPACT | GLOB | MODULES | SLASH | VARIABLE | +| CONCAT | GRANTS | NCHAR | SLIDING | VARIABLES | +| CONFLICT | GROUP | NE | SLIMIT | VGROUPS | +| CONNECTION | GT | NONE | SMALLINT | VIEW | +| CONNECTIONS | HAVING | NOT | SOFFSET | VNODES | +| CONNS | ID | NOTNULL | STABLE | WAL | +| COPY | IF | NOW | STABLES | WHERE | diff --git a/documentation20/cn/12.taos-sql/01.error-code/docs.md b/documentation20/cn/12.taos-sql/01.error-code/docs.md index 95975dba5aeeeee9f42c3bf0b34f48095ea83fa3..867aa18715f87a1dfc9ea36203d32382bb726e30 100644 --- a/documentation20/cn/12.taos-sql/01.error-code/docs.md +++ b/documentation20/cn/12.taos-sql/01.error-code/docs.md @@ -1,172 +1,172 @@ # TDengine 2.0 错误码以及对应的十进制码 -| 状态码 | 模 | 错误码(十六进制) | 错误描述 | 错误码(十进制) | -|-----------------------| :---: | :---------: | :------------------------ | ---------------- | -|TSDB_CODE_RPC_ACTION_IN_PROGRESS| 0 | 0x0001| "Action in progress"| -2147483647| -|TSDB_CODE_RPC_AUTH_REQUIRED| 0 | 0x0002 | "Authentication required"| -2147483646| -|TSDB_CODE_RPC_AUTH_FAILURE| 0| 0x0003 | "Authentication failure"| -2147483645| -|TSDB_CODE_RPC_REDIRECT |0 | 0x0004| "Redirect"| -2147483644| -|TSDB_CODE_RPC_NOT_READY| 0 | 0x0005 | "System not ready"| -2147483643| -|TSDB_CODE_RPC_ALREADY_PROCESSED| 0 | 0x0006 |"Message already processed"| -2147483642| -|TSDB_CODE_RPC_LAST_SESSION_NOT_FINISHED| 0 |0x0007| "Last session not finished"| -2147483641| -|TSDB_CODE_RPC_MISMATCHED_LINK_ID| 0| 0x0008 | "Mismatched meter id"| -2147483640| -|TSDB_CODE_RPC_TOO_SLOW| 0 | 0x0009 | "Processing of request timed out"| -2147483639| -|TSDB_CODE_RPC_MAX_SESSIONS| 0 | 0x000A | "Number of sessions reached limit"| -2147483638| -|TSDB_CODE_RPC_NETWORK_UNAVAIL| 0 |0x000B | "Unable to establish connection" |-2147483637| -|TSDB_CODE_RPC_APP_ERROR| 0| 0x000C | "Unexpected generic error in RPC"| -2147483636| -|TSDB_CODE_RPC_UNEXPECTED_RESPONSE| 0 |0x000D | "Unexpected response"| -2147483635| -|TSDB_CODE_RPC_INVALID_VALUE| 0 | 0x000E | "Invalid value"| -2147483634| -|TSDB_CODE_RPC_INVALID_TRAN_ID| 0 | 0x000F | "Invalid transaction id"| -2147483633| -|TSDB_CODE_RPC_INVALID_SESSION_ID| 0| 0x0010 | "Invalid session id"| -2147483632| -|TSDB_CODE_RPC_INVALID_MSG_TYPE| 0| 0x0011| "Invalid message type"| -2147483631| -|TSDB_CODE_RPC_INVALID_RESPONSE_TYPE| 0 | 0x0012| "Invalid response type"| -2147483630| -|TSDB_CODE_RPC_INVALID_TIME_STAMP| 0| 0x0013| "Invalid timestamp"| -2147483629| -|TSDB_CODE_COM_OPS_NOT_SUPPORT| 0 | 0x0100| "Operation not supported"| -2147483392| -|TSDB_CODE_COM_MEMORY_CORRUPTED |0| 0x0101 | "Memory corrupted"| -2147483391| -|TSDB_CODE_COM_OUT_OF_MEMORY| 0| 0x0102| "Out of memory"| -2147483390| -|TSDB_CODE_COM_INVALID_CFG_MSG| 0 | 0x0103| "Invalid config message"| -2147483389| -|TSDB_CODE_COM_FILE_CORRUPTED| 0| 0x0104| "Data file corrupted" |-2147483388| -|TSDB_CODE_TSC_INVALID_SQL| 0| 0x0200 | "Invalid SQL statement"| -2147483136| -|TSDB_CODE_TSC_INVALID_QHANDLE| 0 | 0x0201 | "Invalid qhandle"| -2147483135| -|TSDB_CODE_TSC_INVALID_TIME_STAMP| 0 | 0x0202 | "Invalid combination of client/service time"| -2147483134| -|TSDB_CODE_TSC_INVALID_VALUE| 0 | 0x0203| "Invalid value in client"| -2147483133| -|TSDB_CODE_TSC_INVALID_VERSION| 0 | 0x0204 | "Invalid client version" |-2147483132| -|TSDB_CODE_TSC_INVALID_IE| 0 | 0x0205 | "Invalid client ie" |-2147483131| -|TSDB_CODE_TSC_INVALID_FQDN| 0 | 0x0206| "Invalid host name"| -2147483130| -|TSDB_CODE_TSC_INVALID_USER_LENGTH| 0 | 0x0207| "Invalid user name"| -2147483129| -|TSDB_CODE_TSC_INVALID_PASS_LENGTH| 0 | 0x0208 | "Invalid password"| -2147483128| -|TSDB_CODE_TSC_INVALID_DB_LENGTH| 0 | 0x0209| "Database name too long"| -2147483127| -|TSDB_CODE_TSC_INVALID_TABLE_ID_LENGTH| 0 | 0x020A | "Table name too long"| -2147483126| -|TSDB_CODE_TSC_INVALID_CONNECTION| 0 | 0x020B| "Invalid connection"| -2147483125| -|TSDB_CODE_TSC_OUT_OF_MEMORY| 0 | 0x020C | "System out of memory" |-2147483124| -|TSDB_CODE_TSC_NO_DISKSPACE| 0 | 0x020D | "System out of disk space"| -2147483123| -|TSDB_CODE_TSC_QUERY_CACHE_ERASED| 0 | 0x020E| "Query cache erased"| -2147483122| -|TSDB_CODE_TSC_QUERY_CANCELLED| 0 | 0x020F |"Query terminated"| -2147483121| -|TSDB_CODE_TSC_SORTED_RES_TOO_MANY| 0 |0x0210 | "Result set too large to be sorted"| -2147483120| -|TSDB_CODE_TSC_APP_ERROR| 0 | 0x0211 | "Application error"| -2147483119| -|TSDB_CODE_TSC_ACTION_IN_PROGRESS| 0 |0x0212 | "Action in progress"| -2147483118| -|TSDB_CODE_TSC_DISCONNECTED| 0 | 0x0213 |"Disconnected from service" |-2147483117| -|TSDB_CODE_TSC_NO_WRITE_AUTH| 0 | 0x0214 | "No write permission" |-2147483116| -|TSDB_CODE_MND_MSG_NOT_PROCESSED| 0| 0x0300| "Message not processed"| -2147482880| -|TSDB_CODE_MND_ACTION_IN_PROGRESS| 0 | 0x0301 |"Message is progressing"| -2147482879| -|TSDB_CODE_MND_ACTION_NEED_REPROCESSED| 0 | 0x0302 |"Messag need to be reprocessed"| -2147482878| -|TSDB_CODE_MND_NO_RIGHTS| 0 | 0x0303| "Insufficient privilege for operation"| -2147482877| -|TSDB_CODE_MND_APP_ERROR| 0 | 0x0304 | "Unexpected generic error in mnode"| -2147482876| -|TSDB_CODE_MND_INVALID_CONNECTION| 0 | 0x0305 | "Invalid message connection"| -2147482875| -|TSDB_CODE_MND_INVALID_MSG_VERSION| 0 | 0x0306 | "Incompatible protocol version"| -2147482874| -|TSDB_CODE_MND_INVALID_MSG_LEN| 0| 0x0307 | "Invalid message length"| -2147482873| -|TSDB_CODE_MND_INVALID_MSG_TYPE| 0 | 0x0308 | "Invalid message type" |-2147482872| -|TSDB_CODE_MND_TOO_MANY_SHELL_CONNS| 0 |0x0309 | "Too many connections"| -2147482871| -|TSDB_CODE_MND_OUT_OF_MEMORY| 0 |0x030A | "Out of memory in mnode"| -2147482870| -|TSDB_CODE_MND_INVALID_SHOWOBJ| 0 | 0x030B |"Data expired"| -2147482869| -|TSDB_CODE_MND_INVALID_QUERY_ID |0 | 0x030C |"Invalid query id" |-2147482868| -|TSDB_CODE_MND_INVALID_STREAM_ID| 0 |0x030D | "Invalid stream id"| -2147482867| -|TSDB_CODE_MND_INVALID_CONN_ID| 0| 0x030E | "Invalid connection id" |-2147482866| -|TSDB_CODE_MND_SDB_OBJ_ALREADY_THERE| 0 | 0x0320| "Object already there"| -2147482848| -|TSDB_CODE_MND_SDB_ERROR| 0 |0x0321 | "Unexpected generic error in sdb" |-2147482847| -|TSDB_CODE_MND_SDB_INVALID_TABLE_TYPE| 0 | 0x0322| "Invalid table type" |-2147482846| -|TSDB_CODE_MND_SDB_OBJ_NOT_THERE| 0 | 0x0323 |"Object not there" |-2147482845| -|TSDB_CODE_MND_SDB_INVAID_META_ROW| 0 | 0x0324| "Invalid meta row" |-2147482844| -|TSDB_CODE_MND_SDB_INVAID_KEY_TYPE| 0 | 0x0325 |"Invalid key type" |-2147482843| -|TSDB_CODE_MND_DNODE_ALREADY_EXIST| 0 | 0x0330 | "DNode already exists"| -2147482832| -|TSDB_CODE_MND_DNODE_NOT_EXIST| 0 | 0x0331| "DNode does not exist" |-2147482831| -|TSDB_CODE_MND_VGROUP_NOT_EXIST| 0 | 0x0332 |"VGroup does not exist"| -2147482830| -|TSDB_CODE_MND_NO_REMOVE_MASTER |0 | 0x0333 | "Master DNode cannot be removed"| -2147482829| -|TSDB_CODE_MND_NO_ENOUGH_DNODES |0 | 0x0334| "Out of DNodes"| -2147482828| -|TSDB_CODE_MND_CLUSTER_CFG_INCONSISTENT |0 | 0x0335 | "Cluster cfg inconsistent"| -2147482827| -|TSDB_CODE_MND_INVALID_DNODE_CFG_OPTION| 0 | 0x0336 | "Invalid dnode cfg option"| -2147482826| -|TSDB_CODE_MND_BALANCE_ENABLED| 0 | 0x0337 | "Balance already enabled" |-2147482825| -|TSDB_CODE_MND_VGROUP_NOT_IN_DNODE| 0 |0x0338 | "Vgroup not in dnode"| -2147482824| -|TSDB_CODE_MND_VGROUP_ALREADY_IN_DNODE| 0 | 0x0339 | "Vgroup already in dnode"| -2147482823| -|TSDB_CODE_MND_DNODE_NOT_FREE |0 | 0x033A |"Dnode not avaliable"| -2147482822| -|TSDB_CODE_MND_INVALID_CLUSTER_ID |0 |0x033B | "Cluster id not match"| -2147482821| -|TSDB_CODE_MND_NOT_READY| 0 | 0x033C |"Cluster not ready"| -2147482820| -|TSDB_CODE_MND_ACCT_ALREADY_EXIST| 0 | 0x0340 | "Account already exists" |-2147482816| -|TSDB_CODE_MND_INVALID_ACCT| 0 | 0x0341| "Invalid account"| -2147482815| -|TSDB_CODE_MND_INVALID_ACCT_OPTION| 0 | 0x0342 | "Invalid account options"| -2147482814| -|TSDB_CODE_MND_USER_ALREADY_EXIST| 0 | 0x0350 | "User already exists"| -2147482800| -|TSDB_CODE_MND_INVALID_USER |0 | 0x0351 | "Invalid user" |-2147482799| -|TSDB_CODE_MND_INVALID_USER_FORMAT| 0 |0x0352 |"Invalid user format" |-2147482798| -|TSDB_CODE_MND_INVALID_PASS_FORMAT| 0| 0x0353 | "Invalid password format"| -2147482797| -|TSDB_CODE_MND_NO_USER_FROM_CONN| 0 | 0x0354 | "Can not get user from conn"| -2147482796| -|TSDB_CODE_MND_TOO_MANY_USERS| 0 | 0x0355| "Too many users"| -2147482795| -|TSDB_CODE_MND_TABLE_ALREADY_EXIST| 0| 0x0360| "Table already exists"| -2147482784| -|TSDB_CODE_MND_INVALID_TABLE_ID| 0| 0x0361| "Table name too long"| -2147482783| -|TSDB_CODE_MND_INVALID_TABLE_NAME| 0| 0x0362 | "Table does not exist"| -2147482782| -|TSDB_CODE_MND_INVALID_TABLE_TYPE| 0| 0x0363 | "Invalid table type in tsdb"| -2147482781| -|TSDB_CODE_MND_TOO_MANY_TAGS| 0 | 0x0364| "Too many tags"| -2147482780| -|TSDB_CODE_MND_TOO_MANY_TIMESERIES| 0| 0x0366| "Too many time series"| -2147482778| -|TSDB_CODE_MND_NOT_SUPER_TABLE| 0 |0x0367| "Not super table"| -2147482777| -|TSDB_CODE_MND_COL_NAME_TOO_LONG| 0| 0x0368| "Tag name too long"| -2147482776| -|TSDB_CODE_MND_TAG_ALREAY_EXIST| 0| 0x0369| "Tag already exists"| -2147482775| -|TSDB_CODE_MND_TAG_NOT_EXIST| 0 |0x036A | "Tag does not exist" |-2147482774| -|TSDB_CODE_MND_FIELD_ALREAY_EXIST| 0 | 0x036B| "Field already exists"| -2147482773| -|TSDB_CODE_MND_FIELD_NOT_EXIST| 0 | 0x036C | "Field does not exist"| -2147482772| -|TSDB_CODE_MND_INVALID_STABLE_NAME |0 | 0x036D |"Super table does not exist" |-2147482771| -|TSDB_CODE_MND_DB_NOT_SELECTED| 0 | 0x0380 | "Database not specified or available"| -2147482752| -|TSDB_CODE_MND_DB_ALREADY_EXIST| 0 | 0x0381 | "Database already exists"| -2147482751| -|TSDB_CODE_MND_INVALID_DB_OPTION| 0 | 0x0382 | "Invalid database options"| -2147482750| -|TSDB_CODE_MND_INVALID_DB| 0 | 0x0383 | "Invalid database name"| -2147482749| -|TSDB_CODE_MND_MONITOR_DB_FORBIDDEN| 0 | 0x0384 | "Cannot delete monitor database"| -2147482748| -|TSDB_CODE_MND_TOO_MANY_DATABASES| 0| 0x0385 | "Too many databases for account"| -2147482747| -|TSDB_CODE_MND_DB_IN_DROPPING| 0 | 0x0386| "Database not available" |-2147482746| -|TSDB_CODE_DND_MSG_NOT_PROCESSED| 0| 0x0400 | "Message not processed"| -2147482624| -|TSDB_CODE_DND_OUT_OF_MEMORY |0 | 0x0401 | "Dnode out of memory"| -2147482623| -|TSDB_CODE_DND_NO_WRITE_ACCESS| 0 | 0x0402 | "No permission for disk files in dnode"| -2147482622| -|TSDB_CODE_DND_INVALID_MSG_LEN| 0 | 0x0403 | "Invalid message length"| -2147482621| -|TSDB_CODE_VND_ACTION_IN_PROGRESS |0 |0x0500| "Action in progress" |-2147482368| -|TSDB_CODE_VND_MSG_NOT_PROCESSED| 0 |0x0501 | "Message not processed" |-2147482367| -|TSDB_CODE_VND_ACTION_NEED_REPROCESSED |0 |0x0502| "Action need to be reprocessed"| -2147482366| -|TSDB_CODE_VND_INVALID_VGROUP_ID |0 | 0x0503| "Invalid Vgroup ID"| -2147482365| -|TSDB_CODE_VND_INIT_FAILED| 0 | 0x0504 | "Vnode initialization failed"| -2147482364| -|TSDB_CODE_VND_NO_DISKSPACE| 0 |0x0505| "System out of disk space" |-2147482363| -|TSDB_CODE_VND_NO_DISK_PERMISSIONS| 0 | 0x0506| "No write permission for disk files" |-2147482362| -|TSDB_CODE_VND_NO_SUCH_FILE_OR_DIR| 0 | 0x0507 | "Missing data file"| -2147482361| -|TSDB_CODE_VND_OUT_OF_MEMORY |0| 0x0508 | "Out of memory"| -2147482360| -|TSDB_CODE_VND_APP_ERROR| 0| 0x0509 | "Unexpected generic error in vnode"| -2147482359| -|TSDB_CODE_VND_INVALID_STATUS |0| 0x0510 | "Database not ready"| -2147482352| -|TSDB_CODE_VND_NOT_SYNCED| 0 | 0x0511 | "Database suspended"| -2147482351| -|TSDB_CODE_VND_NO_WRITE_AUTH| 0 | 0x0512| "Write operation denied" |-2147482350| -|TSDB_CODE_TDB_INVALID_TABLE_ID |0 | 0x0600 | "Invalid table ID"| -2147482112| -|TSDB_CODE_TDB_INVALID_TABLE_TYPE| 0| 0x0601 |"Invalid table type"| -2147482111| -|TSDB_CODE_TDB_IVD_TB_SCHEMA_VERSION| 0| 0x0602| "Invalid table schema version"| -2147482110| -|TSDB_CODE_TDB_TABLE_ALREADY_EXIST| 0 | 0x0603| "Table already exists"| -2147482109| -|TSDB_CODE_TDB_INVALID_CONFIG| 0 | 0x0604| "Invalid configuration"| -2147482108| -|TSDB_CODE_TDB_INIT_FAILED| 0 | 0x0605| "Tsdb init failed"| -2147482107| -|TSDB_CODE_TDB_NO_DISKSPACE| 0 | 0x0606| "No diskspace for tsdb"| -2147482106| -|TSDB_CODE_TDB_NO_DISK_PERMISSIONS| 0 | 0x0607| "No permission for disk files"| -2147482105| -|TSDB_CODE_TDB_FILE_CORRUPTED| 0 | 0x0608| "Data file(s) corrupted"| -2147482104| -|TSDB_CODE_TDB_OUT_OF_MEMORY| 0 | 0x0609| "Out of memory"| -2147482103| -|TSDB_CODE_TDB_TAG_VER_OUT_OF_DATE| 0 | 0x060A| "Tag too old"| -2147482102| -|TSDB_CODE_TDB_TIMESTAMP_OUT_OF_RANGE |0| 0x060B | "Timestamp data out of range"| -2147482101| -|TSDB_CODE_TDB_SUBMIT_MSG_MSSED_UP| 0| 0x060C| "Submit message is messed up"| -2147482100| -|TSDB_CODE_TDB_INVALID_ACTION| 0 | 0x060D | "Invalid operation"| -2147482099| -|TSDB_CODE_TDB_INVALID_CREATE_TB_MSG| 0 | 0x060E| "Invalid creation of table"| -2147482098| -|TSDB_CODE_TDB_NO_TABLE_DATA_IN_MEM| 0 | 0x060F| "No table data in memory skiplist" |-2147482097| -|TSDB_CODE_TDB_FILE_ALREADY_EXISTS| 0 | 0x0610| "File already exists"| -2147482096| -|TSDB_CODE_TDB_TABLE_RECONFIGURE| 0 | 0x0611| "Need to reconfigure table"| -2147482095| -|TSDB_CODE_TDB_IVD_CREATE_TABLE_INFO| 0 | 0x0612| "Invalid information to create table"| -2147482094| -|TSDB_CODE_QRY_INVALID_QHANDLE| 0 | 0x0700| "Invalid handle"| -2147481856| -|TSDB_CODE_QRY_INVALID_MSG| 0 | 0x0701| "Invalid message"| -2147481855| -|TSDB_CODE_QRY_NO_DISKSPACE| 0 | 0x0702 | "No diskspace for query"| -2147481854| -|TSDB_CODE_QRY_OUT_OF_MEMORY| 0 | 0x0703 | "System out of memory"| -2147481853| -|TSDB_CODE_QRY_APP_ERROR| 0 | 0x0704 | "Unexpected generic error in query"| -2147481852| -|TSDB_CODE_QRY_DUP_JOIN_KEY| 0 | 0x0705| "Duplicated join key"| -2147481851| -|TSDB_CODE_QRY_EXCEED_TAGS_LIMIT| 0 | 0x0706 | "Tag conditon too many"| -2147481850| -|TSDB_CODE_QRY_NOT_READY |0| 0x0707 | "Query not ready" |-2147481849| -|TSDB_CODE_QRY_HAS_RSP| 0 | 0x0708| "Query should response"| -2147481848| -|TSDB_CODE_GRANT_EXPIRED| 0 | 0x0800| "License expired"| -2147481600| -|TSDB_CODE_GRANT_DNODE_LIMITED| 0 | 0x0801 | "DNode creation limited by licence"| -2147481599| -|TSDB_CODE_GRANT_ACCT_LIMITED |0| 0x0802 |"Account creation limited by license"| -2147481598| -|TSDB_CODE_GRANT_TIMESERIES_LIMITED| 0 | 0x0803 | "Table creation limited by license"| -2147481597| -|TSDB_CODE_GRANT_DB_LIMITED| 0 | 0x0804 | "DB creation limited by license"| -2147481596| -|TSDB_CODE_GRANT_USER_LIMITED| 0 | 0x0805 | "User creation limited by license"| -2147481595| -|TSDB_CODE_GRANT_CONN_LIMITED| 0| 0x0806 | "Conn creation limited by license" |-2147481594| -|TSDB_CODE_GRANT_STREAM_LIMITED| 0 | 0x0807 | "Stream creation limited by license"| -2147481593| -|TSDB_CODE_GRANT_SPEED_LIMITED| 0 | 0x0808 | "Write speed limited by license" |-2147481592| -|TSDB_CODE_GRANT_STORAGE_LIMITED| 0 |0x0809 | "Storage capacity limited by license"| -2147481591| -|TSDB_CODE_GRANT_QUERYTIME_LIMITED| 0 | 0x080A | "Query time limited by license" |-2147481590| -|TSDB_CODE_GRANT_CPU_LIMITED| 0 |0x080B |"CPU cores limited by license"| -2147481589| -|TSDB_CODE_SYN_INVALID_CONFIG| 0 | 0x0900| "Invalid Sync Configuration"| -2147481344| -|TSDB_CODE_SYN_NOT_ENABLED| 0 | 0x0901 | "Sync module not enabled" |-2147481343| -|TSDB_CODE_WAL_APP_ERROR| 0| 0x1000 | "Unexpected generic error in wal" |-2147479552| \ No newline at end of file +| 状态码 | 模 | 错误码(十六进制) | 错误描述 | 错误码(十进制) | +| :-------------------------------------- | :--: | :----------------: | :------------------------------------------- | :--------------- | +| TSDB_CODE_RPC_ACTION_IN_PROGRESS | 0 | 0x0001 | "Action in progress" | -2147483647 | +| TSDB_CODE_RPC_AUTH_REQUIRED | 0 | 0x0002 | "Authentication required" | -2147483646 | +| TSDB_CODE_RPC_AUTH_FAILURE | 0 | 0x0003 | "Authentication failure" | -2147483645 | +| TSDB_CODE_RPC_REDIRECT | 0 | 0x0004 | "Redirect" | -2147483644 | +| TSDB_CODE_RPC_NOT_READY | 0 | 0x0005 | "System not ready" | -2147483643 | +| TSDB_CODE_RPC_ALREADY_PROCESSED | 0 | 0x0006 | "Message already processed" | -2147483642 | +| TSDB_CODE_RPC_LAST_SESSION_NOT_FINISHED | 0 | 0x0007 | "Last session not finished" | -2147483641 | +| TSDB_CODE_RPC_MISMATCHED_LINK_ID | 0 | 0x0008 | "Mismatched meter id" | -2147483640 | +| TSDB_CODE_RPC_TOO_SLOW | 0 | 0x0009 | "Processing of request timed out" | -2147483639 | +| TSDB_CODE_RPC_MAX_SESSIONS | 0 | 0x000A | "Number of sessions reached limit" | -2147483638 | +| TSDB_CODE_RPC_NETWORK_UNAVAIL | 0 | 0x000B | "Unable to establish connection" | -2147483637 | +| TSDB_CODE_RPC_APP_ERROR | 0 | 0x000C | "Unexpected generic error in RPC" | -2147483636 | +| TSDB_CODE_RPC_UNEXPECTED_RESPONSE | 0 | 0x000D | "Unexpected response" | -2147483635 | +| TSDB_CODE_RPC_INVALID_VALUE | 0 | 0x000E | "Invalid value" | -2147483634 | +| TSDB_CODE_RPC_INVALID_TRAN_ID | 0 | 0x000F | "Invalid transaction id" | -2147483633 | +| TSDB_CODE_RPC_INVALID_SESSION_ID | 0 | 0x0010 | "Invalid session id" | -2147483632 | +| TSDB_CODE_RPC_INVALID_MSG_TYPE | 0 | 0x0011 | "Invalid message type" | -2147483631 | +| TSDB_CODE_RPC_INVALID_RESPONSE_TYPE | 0 | 0x0012 | "Invalid response type" | -2147483630 | +| TSDB_CODE_RPC_INVALID_TIME_STAMP | 0 | 0x0013 | "Invalid timestamp" | -2147483629 | +| TSDB_CODE_COM_OPS_NOT_SUPPORT | 0 | 0x0100 | "Operation not supported" | -2147483392 | +| TSDB_CODE_COM_MEMORY_CORRUPTED | 0 | 0x0101 | "Memory corrupted" | -2147483391 | +| TSDB_CODE_COM_OUT_OF_MEMORY | 0 | 0x0102 | "Out of memory" | -2147483390 | +| TSDB_CODE_COM_INVALID_CFG_MSG | 0 | 0x0103 | "Invalid config message" | -2147483389 | +| TSDB_CODE_COM_FILE_CORRUPTED | 0 | 0x0104 | "Data file corrupted" | -2147483388 | +| TSDB_CODE_TSC_INVALID_OPERATION | 0 | 0x0200 | "Invalid SQL statement" | -2147483136 | +| TSDB_CODE_TSC_INVALID_QHANDLE | 0 | 0x0201 | "Invalid qhandle" | -2147483135 | +| TSDB_CODE_TSC_INVALID_TIME_STAMP | 0 | 0x0202 | "Invalid combination of client/service time" | -2147483134 | +| TSDB_CODE_TSC_INVALID_VALUE | 0 | 0x0203 | "Invalid value in client" | -2147483133 | +| TSDB_CODE_TSC_INVALID_VERSION | 0 | 0x0204 | "Invalid client version" | -2147483132 | +| TSDB_CODE_TSC_INVALID_IE | 0 | 0x0205 | "Invalid client ie" | -2147483131 | +| TSDB_CODE_TSC_INVALID_FQDN | 0 | 0x0206 | "Invalid host name" | -2147483130 | +| TSDB_CODE_TSC_INVALID_USER_LENGTH | 0 | 0x0207 | "Invalid user name" | -2147483129 | +| TSDB_CODE_TSC_INVALID_PASS_LENGTH | 0 | 0x0208 | "Invalid password" | -2147483128 | +| TSDB_CODE_TSC_INVALID_DB_LENGTH | 0 | 0x0209 | "Database name too long" | -2147483127 | +| TSDB_CODE_TSC_INVALID_TABLE_ID_LENGTH | 0 | 0x020A | "Table name too long" | -2147483126 | +| TSDB_CODE_TSC_INVALID_CONNECTION | 0 | 0x020B | "Invalid connection" | -2147483125 | +| TSDB_CODE_TSC_OUT_OF_MEMORY | 0 | 0x020C | "System out of memory" | -2147483124 | +| TSDB_CODE_TSC_NO_DISKSPACE | 0 | 0x020D | "System out of disk space" | -2147483123 | +| TSDB_CODE_TSC_QUERY_CACHE_ERASED | 0 | 0x020E | "Query cache erased" | -2147483122 | +| TSDB_CODE_TSC_QUERY_CANCELLED | 0 | 0x020F | "Query terminated" | -2147483121 | +| TSDB_CODE_TSC_SORTED_RES_TOO_MANY | 0 | 0x0210 | "Result set too large to be sorted" | -2147483120 | +| TSDB_CODE_TSC_APP_ERROR | 0 | 0x0211 | "Application error" | -2147483119 | +| TSDB_CODE_TSC_ACTION_IN_PROGRESS | 0 | 0x0212 | "Action in progress" | -2147483118 | +| TSDB_CODE_TSC_DISCONNECTED | 0 | 0x0213 | "Disconnected from service" | -2147483117 | +| TSDB_CODE_TSC_NO_WRITE_AUTH | 0 | 0x0214 | "No write permission" | -2147483116 | +| TSDB_CODE_MND_MSG_NOT_PROCESSED | 0 | 0x0300 | "Message not processed" | -2147482880 | +| TSDB_CODE_MND_ACTION_IN_PROGRESS | 0 | 0x0301 | "Message is progressing" | -2147482879 | +| TSDB_CODE_MND_ACTION_NEED_REPROCESSED | 0 | 0x0302 | "Messag need to be reprocessed" | -2147482878 | +| TSDB_CODE_MND_NO_RIGHTS | 0 | 0x0303 | "Insufficient privilege for operation" | -2147482877 | +| TSDB_CODE_MND_APP_ERROR | 0 | 0x0304 | "Unexpected generic error in mnode" | -2147482876 | +| TSDB_CODE_MND_INVALID_CONNECTION | 0 | 0x0305 | "Invalid message connection" | -2147482875 | +| TSDB_CODE_MND_INVALID_MSG_VERSION | 0 | 0x0306 | "Incompatible protocol version" | -2147482874 | +| TSDB_CODE_MND_INVALID_MSG_LEN | 0 | 0x0307 | "Invalid message length" | -2147482873 | +| TSDB_CODE_MND_INVALID_MSG_TYPE | 0 | 0x0308 | "Invalid message type" | -2147482872 | +| TSDB_CODE_MND_TOO_MANY_SHELL_CONNS | 0 | 0x0309 | "Too many connections" | -2147482871 | +| TSDB_CODE_MND_OUT_OF_MEMORY | 0 | 0x030A | "Out of memory in mnode" | -2147482870 | +| TSDB_CODE_MND_INVALID_SHOWOBJ | 0 | 0x030B | "Data expired" | -2147482869 | +| TSDB_CODE_MND_INVALID_QUERY_ID | 0 | 0x030C | "Invalid query id" | -2147482868 | +| TSDB_CODE_MND_INVALID_STREAM_ID | 0 | 0x030D | "Invalid stream id" | -2147482867 | +| TSDB_CODE_MND_INVALID_CONN_ID | 0 | 0x030E | "Invalid connection id" | -2147482866 | +| TSDB_CODE_MND_SDB_OBJ_ALREADY_THERE | 0 | 0x0320 | "Object already there" | -2147482848 | +| TSDB_CODE_MND_SDB_ERROR | 0 | 0x0321 | "Unexpected generic error in sdb" | -2147482847 | +| TSDB_CODE_MND_SDB_INVALID_TABLE_TYPE | 0 | 0x0322 | "Invalid table type" | -2147482846 | +| TSDB_CODE_MND_SDB_OBJ_NOT_THERE | 0 | 0x0323 | "Object not there" | -2147482845 | +| TSDB_CODE_MND_SDB_INVAID_META_ROW | 0 | 0x0324 | "Invalid meta row" | -2147482844 | +| TSDB_CODE_MND_SDB_INVAID_KEY_TYPE | 0 | 0x0325 | "Invalid key type" | -2147482843 | +| TSDB_CODE_MND_DNODE_ALREADY_EXIST | 0 | 0x0330 | "DNode already exists" | -2147482832 | +| TSDB_CODE_MND_DNODE_NOT_EXIST | 0 | 0x0331 | "DNode does not exist" | -2147482831 | +| TSDB_CODE_MND_VGROUP_NOT_EXIST | 0 | 0x0332 | "VGroup does not exist" | -2147482830 | +| TSDB_CODE_MND_NO_REMOVE_MASTER | 0 | 0x0333 | "Master DNode cannot be removed" | -2147482829 | +| TSDB_CODE_MND_NO_ENOUGH_DNODES | 0 | 0x0334 | "Out of DNodes" | -2147482828 | +| TSDB_CODE_MND_CLUSTER_CFG_INCONSISTENT | 0 | 0x0335 | "Cluster cfg inconsistent" | -2147482827 | +| TSDB_CODE_MND_INVALID_DNODE_CFG_OPTION | 0 | 0x0336 | "Invalid dnode cfg option" | -2147482826 | +| TSDB_CODE_MND_BALANCE_ENABLED | 0 | 0x0337 | "Balance already enabled" | -2147482825 | +| TSDB_CODE_MND_VGROUP_NOT_IN_DNODE | 0 | 0x0338 | "Vgroup not in dnode" | -2147482824 | +| TSDB_CODE_MND_VGROUP_ALREADY_IN_DNODE | 0 | 0x0339 | "Vgroup already in dnode" | -2147482823 | +| TSDB_CODE_MND_DNODE_NOT_FREE | 0 | 0x033A | "Dnode not avaliable" | -2147482822 | +| TSDB_CODE_MND_INVALID_CLUSTER_ID | 0 | 0x033B | "Cluster id not match" | -2147482821 | +| TSDB_CODE_MND_NOT_READY | 0 | 0x033C | "Cluster not ready" | -2147482820 | +| TSDB_CODE_MND_ACCT_ALREADY_EXIST | 0 | 0x0340 | "Account already exists" | -2147482816 | +| TSDB_CODE_MND_INVALID_ACCT | 0 | 0x0341 | "Invalid account" | -2147482815 | +| TSDB_CODE_MND_INVALID_ACCT_OPTION | 0 | 0x0342 | "Invalid account options" | -2147482814 | +| TSDB_CODE_MND_USER_ALREADY_EXIST | 0 | 0x0350 | "User already exists" | -2147482800 | +| TSDB_CODE_MND_INVALID_USER | 0 | 0x0351 | "Invalid user" | -2147482799 | +| TSDB_CODE_MND_INVALID_USER_FORMAT | 0 | 0x0352 | "Invalid user format" | -2147482798 | +| TSDB_CODE_MND_INVALID_PASS_FORMAT | 0 | 0x0353 | "Invalid password format" | -2147482797 | +| TSDB_CODE_MND_NO_USER_FROM_CONN | 0 | 0x0354 | "Can not get user from conn" | -2147482796 | +| TSDB_CODE_MND_TOO_MANY_USERS | 0 | 0x0355 | "Too many users" | -2147482795 | +| TSDB_CODE_MND_TABLE_ALREADY_EXIST | 0 | 0x0360 | "Table already exists" | -2147482784 | +| TSDB_CODE_MND_INVALID_TABLE_ID | 0 | 0x0361 | "Table name too long" | -2147482783 | +| TSDB_CODE_MND_INVALID_TABLE_NAME | 0 | 0x0362 | "Table does not exist" | -2147482782 | +| TSDB_CODE_MND_INVALID_TABLE_TYPE | 0 | 0x0363 | "Invalid table type in tsdb" | -2147482781 | +| TSDB_CODE_MND_TOO_MANY_TAGS | 0 | 0x0364 | "Too many tags" | -2147482780 | +| TSDB_CODE_MND_TOO_MANY_TIMESERIES | 0 | 0x0366 | "Too many time series" | -2147482778 | +| TSDB_CODE_MND_NOT_SUPER_TABLE | 0 | 0x0367 | "Not super table" | -2147482777 | +| TSDB_CODE_MND_COL_NAME_TOO_LONG | 0 | 0x0368 | "Tag name too long" | -2147482776 | +| TSDB_CODE_MND_TAG_ALREAY_EXIST | 0 | 0x0369 | "Tag already exists" | -2147482775 | +| TSDB_CODE_MND_TAG_NOT_EXIST | 0 | 0x036A | "Tag does not exist" | -2147482774 | +| TSDB_CODE_MND_FIELD_ALREAY_EXIST | 0 | 0x036B | "Field already exists" | -2147482773 | +| TSDB_CODE_MND_FIELD_NOT_EXIST | 0 | 0x036C | "Field does not exist" | -2147482772 | +| TSDB_CODE_MND_INVALID_STABLE_NAME | 0 | 0x036D | "Super table does not exist" | -2147482771 | +| TSDB_CODE_MND_DB_NOT_SELECTED | 0 | 0x0380 | "Database not specified or available" | -2147482752 | +| TSDB_CODE_MND_DB_ALREADY_EXIST | 0 | 0x0381 | "Database already exists" | -2147482751 | +| TSDB_CODE_MND_INVALID_DB_OPTION | 0 | 0x0382 | "Invalid database options" | -2147482750 | +| TSDB_CODE_MND_INVALID_DB | 0 | 0x0383 | "Invalid database name" | -2147482749 | +| TSDB_CODE_MND_MONITOR_DB_FORBIDDEN | 0 | 0x0384 | "Cannot delete monitor database" | -2147482748 | +| TSDB_CODE_MND_TOO_MANY_DATABASES | 0 | 0x0385 | "Too many databases for account" | -2147482747 | +| TSDB_CODE_MND_DB_IN_DROPPING | 0 | 0x0386 | "Database not available" | -2147482746 | +| TSDB_CODE_DND_MSG_NOT_PROCESSED | 0 | 0x0400 | "Message not processed" | -2147482624 | +| TSDB_CODE_DND_OUT_OF_MEMORY | 0 | 0x0401 | "Dnode out of memory" | -2147482623 | +| TSDB_CODE_DND_NO_WRITE_ACCESS | 0 | 0x0402 | "No permission for disk files in dnode" | -2147482622 | +| TSDB_CODE_DND_INVALID_MSG_LEN | 0 | 0x0403 | "Invalid message length" | -2147482621 | +| TSDB_CODE_VND_ACTION_IN_PROGRESS | 0 | 0x0500 | "Action in progress" | -2147482368 | +| TSDB_CODE_VND_MSG_NOT_PROCESSED | 0 | 0x0501 | "Message not processed" | -2147482367 | +| TSDB_CODE_VND_ACTION_NEED_REPROCESSED | 0 | 0x0502 | "Action need to be reprocessed" | -2147482366 | +| TSDB_CODE_VND_INVALID_VGROUP_ID | 0 | 0x0503 | "Invalid Vgroup ID" | -2147482365 | +| TSDB_CODE_VND_INIT_FAILED | 0 | 0x0504 | "Vnode initialization failed" | -2147482364 | +| TSDB_CODE_VND_NO_DISKSPACE | 0 | 0x0505 | "System out of disk space" | -2147482363 | +| TSDB_CODE_VND_NO_DISK_PERMISSIONS | 0 | 0x0506 | "No write permission for disk files" | -2147482362 | +| TSDB_CODE_VND_NO_SUCH_FILE_OR_DIR | 0 | 0x0507 | "Missing data file" | -2147482361 | +| TSDB_CODE_VND_OUT_OF_MEMORY | 0 | 0x0508 | "Out of memory" | -2147482360 | +| TSDB_CODE_VND_APP_ERROR | 0 | 0x0509 | "Unexpected generic error in vnode" | -2147482359 | +| TSDB_CODE_VND_INVALID_STATUS | 0 | 0x0510 | "Database not ready" | -2147482352 | +| TSDB_CODE_VND_NOT_SYNCED | 0 | 0x0511 | "Database suspended" | -2147482351 | +| TSDB_CODE_VND_NO_WRITE_AUTH | 0 | 0x0512 | "Write operation denied" | -2147482350 | +| TSDB_CODE_TDB_INVALID_TABLE_ID | 0 | 0x0600 | "Invalid table ID" | -2147482112 | +| TSDB_CODE_TDB_INVALID_TABLE_TYPE | 0 | 0x0601 | "Invalid table type" | -2147482111 | +| TSDB_CODE_TDB_IVD_TB_SCHEMA_VERSION | 0 | 0x0602 | "Invalid table schema version" | -2147482110 | +| TSDB_CODE_TDB_TABLE_ALREADY_EXIST | 0 | 0x0603 | "Table already exists" | -2147482109 | +| TSDB_CODE_TDB_INVALID_CONFIG | 0 | 0x0604 | "Invalid configuration" | -2147482108 | +| TSDB_CODE_TDB_INIT_FAILED | 0 | 0x0605 | "Tsdb init failed" | -2147482107 | +| TSDB_CODE_TDB_NO_DISKSPACE | 0 | 0x0606 | "No diskspace for tsdb" | -2147482106 | +| TSDB_CODE_TDB_NO_DISK_PERMISSIONS | 0 | 0x0607 | "No permission for disk files" | -2147482105 | +| TSDB_CODE_TDB_FILE_CORRUPTED | 0 | 0x0608 | "Data file(s) corrupted" | -2147482104 | +| TSDB_CODE_TDB_OUT_OF_MEMORY | 0 | 0x0609 | "Out of memory" | -2147482103 | +| TSDB_CODE_TDB_TAG_VER_OUT_OF_DATE | 0 | 0x060A | "Tag too old" | -2147482102 | +| TSDB_CODE_TDB_TIMESTAMP_OUT_OF_RANGE | 0 | 0x060B | "Timestamp data out of range" | -2147482101 | +| TSDB_CODE_TDB_SUBMIT_MSG_MSSED_UP | 0 | 0x060C | "Submit message is messed up" | -2147482100 | +| TSDB_CODE_TDB_INVALID_ACTION | 0 | 0x060D | "Invalid operation" | -2147482099 | +| TSDB_CODE_TDB_INVALID_CREATE_TB_MSG | 0 | 0x060E | "Invalid creation of table" | -2147482098 | +| TSDB_CODE_TDB_NO_TABLE_DATA_IN_MEM | 0 | 0x060F | "No table data in memory skiplist" | -2147482097 | +| TSDB_CODE_TDB_FILE_ALREADY_EXISTS | 0 | 0x0610 | "File already exists" | -2147482096 | +| TSDB_CODE_TDB_TABLE_RECONFIGURE | 0 | 0x0611 | "Need to reconfigure table" | -2147482095 | +| TSDB_CODE_TDB_IVD_CREATE_TABLE_INFO | 0 | 0x0612 | "Invalid information to create table" | -2147482094 | +| TSDB_CODE_QRY_INVALID_QHANDLE | 0 | 0x0700 | "Invalid handle" | -2147481856 | +| TSDB_CODE_QRY_INVALID_MSG | 0 | 0x0701 | "Invalid message" | -2147481855 | +| TSDB_CODE_QRY_NO_DISKSPACE | 0 | 0x0702 | "No diskspace for query" | -2147481854 | +| TSDB_CODE_QRY_OUT_OF_MEMORY | 0 | 0x0703 | "System out of memory" | -2147481853 | +| TSDB_CODE_QRY_APP_ERROR | 0 | 0x0704 | "Unexpected generic error in query" | -2147481852 | +| TSDB_CODE_QRY_DUP_JOIN_KEY | 0 | 0x0705 | "Duplicated join key" | -2147481851 | +| TSDB_CODE_QRY_EXCEED_TAGS_LIMIT | 0 | 0x0706 | "Tag conditon too many" | -2147481850 | +| TSDB_CODE_QRY_NOT_READY | 0 | 0x0707 | "Query not ready" | -2147481849 | +| TSDB_CODE_QRY_HAS_RSP | 0 | 0x0708 | "Query should response" | -2147481848 | +| TSDB_CODE_GRANT_EXPIRED | 0 | 0x0800 | "License expired" | -2147481600 | +| TSDB_CODE_GRANT_DNODE_LIMITED | 0 | 0x0801 | "DNode creation limited by licence" | -2147481599 | +| TSDB_CODE_GRANT_ACCT_LIMITED | 0 | 0x0802 | "Account creation limited by license" | -2147481598 | +| TSDB_CODE_GRANT_TIMESERIES_LIMITED | 0 | 0x0803 | "Table creation limited by license" | -2147481597 | +| TSDB_CODE_GRANT_DB_LIMITED | 0 | 0x0804 | "DB creation limited by license" | -2147481596 | +| TSDB_CODE_GRANT_USER_LIMITED | 0 | 0x0805 | "User creation limited by license" | -2147481595 | +| TSDB_CODE_GRANT_CONN_LIMITED | 0 | 0x0806 | "Conn creation limited by license" | -2147481594 | +| TSDB_CODE_GRANT_STREAM_LIMITED | 0 | 0x0807 | "Stream creation limited by license" | -2147481593 | +| TSDB_CODE_GRANT_SPEED_LIMITED | 0 | 0x0808 | "Write speed limited by license" | -2147481592 | +| TSDB_CODE_GRANT_STORAGE_LIMITED | 0 | 0x0809 | "Storage capacity limited by license" | -2147481591 | +| TSDB_CODE_GRANT_QUERYTIME_LIMITED | 0 | 0x080A | "Query time limited by license" | -2147481590 | +| TSDB_CODE_GRANT_CPU_LIMITED | 0 | 0x080B | "CPU cores limited by license" | -2147481589 | +| TSDB_CODE_SYN_INVALID_CONFIG | 0 | 0x0900 | "Invalid Sync Configuration" | -2147481344 | +| TSDB_CODE_SYN_NOT_ENABLED | 0 | 0x0901 | "Sync module not enabled" | -2147481343 | +| TSDB_CODE_WAL_APP_ERROR | 0 | 0x1000 | "Unexpected generic error in wal" | -2147479552 | diff --git a/documentation20/cn/12.taos-sql/docs.md b/documentation20/cn/12.taos-sql/docs.md index 58191e0bd8faa02b0ff2381f1cdd576b379ae9fc..4368e5fa1dfef4300fe8e1c44b47471fb55f70e1 100644 --- a/documentation20/cn/12.taos-sql/docs.md +++ b/documentation20/cn/12.taos-sql/docs.md @@ -34,40 +34,41 @@ taos> DESCRIBE meters; - 时间格式为 ```YYYY-MM-DD HH:mm:ss.MS```,默认时间分辨率为毫秒。比如:```2017-08-12 18:25:58.128``` - 内部函数 now 是客户端的当前时间 - 插入记录时,如果时间戳为 now,插入数据时使用提交这条记录的客户端的当前时间 -- Epoch Time:时间戳也可以是一个长整数,表示从 1970-01-01 08:00:00.000 开始的毫秒数 +- Epoch Time:时间戳也可以是一个长整数,表示从格林威治时间 1970-01-01 00:00:00.000 (UTC/GMT) 开始的毫秒数(相应地,如果所在 Database 的时间精度设置为“微秒”,则长整型格式的时间戳含义也就对应于从格林威治时间 1970-01-01 00:00:00.000 (UTC/GMT) 开始的微秒数) - 时间可以加减,比如 now-2h,表明查询时刻向前推 2 个小时(最近 2 小时)。数字后面的时间单位可以是 u(微秒)、a(毫秒)、s(秒)、m(分)、h(小时)、d(天)、w(周)。 比如 `select * from t1 where ts > now-2w and ts <= now-1w`,表示查询两周前整整一周的数据。在指定降频操作(down sampling)的时间窗口(interval)时,时间单位还可以使用 n(自然月) 和 y(自然年)。 -TDengine 缺省的时间戳是毫秒精度,但通过修改配置参数 enableMicrosecond 就可以支持微秒。 +TDengine 缺省的时间戳是毫秒精度,但通过在 CREATE DATABASE 时传递的 PRECISION 参数就可以支持微秒。 在TDengine中,普通表的数据模型中可使用以下 10 种数据类型。 -| | 类型 | Bytes | 说明 | +| # | **类型** | **Bytes** | **说明** | | ---- | :-------: | ------ | ------------------------------------------------------------ | -| 1 | TIMESTAMP | 8 | 时间戳。缺省精度毫秒,可支持微秒。从格林威治时间 1970-01-01 00:00:00.000 (UTC/GMT) 开始,计时不能早于该时间。(从 2.0.18 版本开始,已经去除了这一时间范围限制) | +| 1 | TIMESTAMP | 8 | 时间戳。缺省精度毫秒,可支持微秒。从格林威治时间 1970-01-01 00:00:00.000 (UTC/GMT) 开始,计时不能早于该时间。(从 2.0.18.0 版本开始,已经去除了这一时间范围限制) | | 2 | INT | 4 | 整型,范围 [-2^31+1, 2^31-1], -2^31 用作 NULL | | 3 | BIGINT | 8 | 长整型,范围 [-2^63+1, 2^63-1], -2^63 用于 NULL | | 4 | FLOAT | 4 | 浮点型,有效位数 6-7,范围 [-3.4E38, 3.4E38] | | 5 | DOUBLE | 8 | 双精度浮点型,有效位数 15-16,范围 [-1.7E308, 1.7E308] | -| 6 | BINARY | 自定义 | 用于记录 ASCII 型字符串。理论上,最长可以有 16374 字节,但由于每行数据最多 16K 字节,实际上限一般小于理论值。 binary 仅支持字符串输入,字符串两端使用单引号引用,否则英文全部自动转化为小写。使用时须指定大小,如 binary(20) 定义了最长为 20 个字符的字符串,每个字符占 1 byte 的存储空间,此时如果用户字符串超出 20 字节将会报错。对于字符串内的单引号,可以用转义字符反斜线加单引号来表示,即 `\’`。 | +| 6 | BINARY | 自定义 | 记录单字节字符串,建议只用于处理 ASCII 可见字符,中文等多字节字符需使用 nchar。理论上,最长可以有 16374 字节,但由于每行数据最多 16K 字节,实际上限一般小于理论值。binary 仅支持字符串输入,字符串两端需使用单引号引用。使用时须指定大小,如 binary(20) 定义了最长为 20 个单字节字符的字符串,每个字符占 1 byte 的存储空间,总共固定占用 20 bytes 的空间,此时如果用户字符串超出 20 字节将会报错。对于字符串内的单引号,可以用转义字符反斜线加单引号来表示,即 `\’`。 | | 7 | SMALLINT | 2 | 短整型, 范围 [-32767, 32767], -32768 用于 NULL | | 8 | TINYINT | 1 | 单字节整型,范围 [-127, 127], -128 用于 NULL | | 9 | BOOL | 1 | 布尔型,{true, false} | -| 10 | NCHAR | 自定义 | 用于记录非 ASCII 型字符串,如中文字符。每个 nchar 字符占用 4 bytes 的存储空间。字符串两端使用单引号引用,字符串内的单引号需用转义字符 `\’`。nchar 使用时须指定字符串大小,类型为 nchar(10) 的列表示此列的字符串最多存储 10 个 nchar 字符,会固定占用 40 bytes 的空间。如果用户字符串长度超出声明长度,将会报错。 | +| 10 | NCHAR | 自定义 | 记录包含多字节字符在内的字符串,如中文字符。每个 nchar 字符占用 4 bytes 的存储空间。字符串两端使用单引号引用,字符串内的单引号需用转义字符 `\’`。nchar 使用时须指定字符串大小,类型为 nchar(10) 的列表示此列的字符串最多存储 10 个 nchar 字符,会固定占用 40 bytes 的空间。如果用户字符串长度超出声明长度,将会报错。 | + **Tips**: 1. TDengine 对 SQL 语句中的英文字符不区分大小写,自动转化为小写执行。因此用户大小写敏感的字符串及密码,需要使用单引号将字符串引起来。 -2. 应避免使用 BINARY 类型来保存非 ASCII 型的字符串,会很容易导致数据乱码等错误。正确的做法是使用 NCHAR 类型来保存中文字符。 +2. **注意**,虽然 Binary 类型在底层存储上支持字节型的二进制字符,但不同编程语言对二进制数据的处理方式并不保证一致,因此建议在 Binary 类型中只存储 ASCII 可见字符,而避免存储不可见字符。多字节的数据,例如中文字符,则需要使用 nchar 类型进行保存。如果强行使用 Binary 类型保存中文字符,虽然有时也能正常读写,但并不带有字符集信息,很容易出现数据乱码甚至数据损坏等情况。 ## 数据库管理 - **创建数据库** ```mysql - CREATE DATABASE [IF NOT EXISTS] db_name [KEEP keep] [UPDATE 1]; + CREATE DATABASE [IF NOT EXISTS] db_name [KEEP keep] [DAYS days] [UPDATE 1]; ``` - 说明: + 说明: - 1) KEEP是该数据库的数据保留多长天数,缺省是3650天(10年),数据库会自动删除超过时限的数据; + 1) KEEP是该数据库的数据保留多长天数,缺省是3650天(10年),数据库会自动删除超过时限的数据; 2) UPDATE 标志数据库支持更新相同时间戳数据; @@ -75,7 +76,7 @@ TDengine 缺省的时间戳是毫秒精度,但通过修改配置参数 enableM 4) 一条SQL 语句的最大长度为65480个字符; - 5) 数据库还有更多与存储相关的配置参数,请参见系统管理。 + 5) 数据库还有更多与存储相关的配置参数,请参见 [服务端配置](https://www.taosdata.com/cn/documentation/administrator#config) 章节。 - **显示系统当前参数** @@ -88,13 +89,13 @@ TDengine 缺省的时间戳是毫秒精度,但通过修改配置参数 enableM ```mysql USE db_name; ``` - 使用/切换数据库 + 使用/切换数据库(在 RESTful 连接方式下无效)。 - **删除数据库** ```mysql DROP DATABASE [IF EXISTS] db_name; ``` - 删除数据库。所包含的全部数据表将被删除,谨慎使用 + 删除数据库。指定 Database 所包含的全部数据表将被删除,谨慎使用! - **修改数据库参数** ```mysql @@ -125,9 +126,10 @@ TDengine 缺省的时间戳是毫秒精度,但通过修改配置参数 enableM ```mysql ALTER DATABASE db_name CACHELAST 0; ``` - CACHELAST 参数控制是否在内存中缓存数据子表的 last_row。缺省值为 0,取值范围 [0, 1]。其中 0 表示不启用、1 表示启用。(从 2.0.11 版本开始支持,修改后需要重启服务器生效。) + CACHELAST 参数控制是否在内存中缓存子表的最近数据。缺省值为 0,取值范围 [0, 1, 2, 3]。其中 0 表示不缓存,1 表示缓存子表最近一行数据,2 表示缓存子表每一列的最近的非 NULL 值,3 表示同时打开缓存最近行和列功能。(从 2.0.11.0 版本开始支持参数值 [0, 1],从 2.1.2.0 版本开始支持参数值 [0, 1, 2, 3]。) + 说明:缓存最近行,将显著改善 LAST_ROW 函数的性能表现;缓存每列的最近非 NULL 值,将显著改善无特殊影响(WHERE、ORDER BY、GROUP BY、INTERVAL)下的 LAST 函数的性能表现。 - **Tips**: 以上所有参数修改后都可以用show databases来确认是否修改成功。 + **Tips**: 以上所有参数修改后都可以用show databases来确认是否修改成功。另外,从 2.1.3.0 版本开始,修改这些参数后无需重启服务器即可生效。 - **显示系统所有数据库** @@ -135,6 +137,14 @@ TDengine 缺省的时间戳是毫秒精度,但通过修改配置参数 enableM SHOW DATABASES; ``` +- **显示一个数据库的创建语句** + + ```mysql + SHOW CREATE DATABASE db_name; + ``` + 常用于数据库迁移。对一个已经存在的数据库,返回其创建语句;在另一个集群中执行该语句,就能得到一个设置完全相同的 Database。 + + ## 表管理 - **创建数据表** @@ -159,22 +169,22 @@ TDengine 缺省的时间戳是毫秒精度,但通过修改配置参数 enableM ```mysql CREATE TABLE [IF NOT EXISTS] tb_name USING stb_name TAGS (tag_value1, ...); ``` - 以指定的超级表为模板,指定 tags 的值来创建数据表。 + 以指定的超级表为模板,指定 TAGS 的值来创建数据表。 -- **以超级表为模板创建数据表,并指定具体的 tags 列** +- **以超级表为模板创建数据表,并指定具体的 TAGS 列** ```mysql CREATE TABLE [IF NOT EXISTS] tb_name USING stb_name (tag_name1, ...) TAGS (tag_value1, ...); ``` - 以指定的超级表为模板,指定一部分 tags 列的值来创建数据表。(没被指定的 tags 列会设为空值。) - 说明:从 2.0.17 版本开始支持这种方式。在之前的版本中,不允许指定 tags 列,而必须显式给出所有 tags 列的取值。 + 以指定的超级表为模板,指定一部分 TAGS 列的值来创建数据表(没被指定的 TAGS 列会设为空值)。 + 说明:从 2.0.17.0 版本开始支持这种方式。在之前的版本中,不允许指定 TAGS 列,而必须显式给出所有 TAGS 列的取值。 - **批量创建数据表** ```mysql CREATE TABLE [IF NOT EXISTS] tb_name1 USING stb_name TAGS (tag_value1, ...) tb_name2 USING stb_name TAGS (tag_value2, ...) ...; ``` - 以更快的速度批量创建大量数据表。(服务器端 2.0.14 及以上版本) + 以更快的速度批量创建大量数据表(服务器端 2.0.14 及以上版本)。 说明: @@ -198,13 +208,21 @@ TDengine 缺省的时间戳是毫秒精度,但通过修改配置参数 enableM 说明:可在like中使用通配符进行名称的匹配,这一通配符字符串最长不能超过24字节。 - 通配符匹配:1)’%’ (百分号)匹配0到任意个字符;2)’\_’下划线匹配一个字符。 + 通配符匹配:1)'%'(百分号)匹配0到任意个字符;2)'\_'下划线匹配单个任意字符。 + +- **显示一个数据表的创建语句** + + ```mysql + SHOW CREATE TABLE tb_name; + ``` + 常用于数据库迁移。对一个已经存在的数据表,返回其创建语句;在另一个集群中执行该语句,就能得到一个结构完全相同的数据表。 - **在线修改显示字符宽度** ```mysql SET MAX_BINARY_DISPLAY_WIDTH ; ``` + 如显示的内容后面以...结尾时,表示该内容已被截断,可通过本命令修改显示字符宽度以显示完整的内容。 - **获取表的结构信息** @@ -221,14 +239,22 @@ TDengine 缺省的时间戳是毫秒精度,但通过修改配置参数 enableM 1) 列的最大个数为1024,最小个数为2; - 2) 列名最大长度为64; + 2) 列名最大长度为64。 - **表删除列** ```mysql ALTER TABLE tb_name DROP COLUMN field_name; ``` - 如果表是通过[超级表](../super-table/)创建,更改表结构的操作只能对超级表进行。同时针对超级表的结构更改对所有通过该结构创建的表生效。对于不是通过超级表创建的表,可以直接修改表结构 + 如果表是通过超级表创建,更改表结构的操作只能对超级表进行。同时针对超级表的结构更改对所有通过该结构创建的表生效。对于不是通过超级表创建的表,可以直接修改表结构。 + +- **表修改列宽** + + ```mysql + ALTER TABLE tb_name MODIFY COLUMN field_name data_type(length); + ``` + 如果数据列的类型是可变长格式(BINARY 或 NCHAR),那么可以使用此指令修改其宽度(只能改大,不能改小)。(2.1.3.0 版本新增) + 如果表是通过超级表创建,更改表结构的操作只能对超级表进行。同时针对超级表的结构更改对所有通过该结构创建的表生效。对于不是通过超级表创建的表,可以直接修改表结构。 ## 超级表STable管理 @@ -239,15 +265,15 @@ TDengine 缺省的时间戳是毫秒精度,但通过修改配置参数 enableM ```mysql CREATE STABLE [IF NOT EXISTS] stb_name (timestamp_field_name TIMESTAMP, field1_name data_type1 [, field2_name data_type2 ...]) TAGS (tag1_name tag_type1, tag2_name tag_type2 [, tag3_name tag_type3]); ``` - 创建 STable,与创建表的 SQL 语法相似,但需指定 TAGS 字段的名称和类型 + 创建 STable,与创建表的 SQL 语法相似,但需要指定 TAGS 字段的名称和类型 说明: - 1) TAGS 列的数据类型不能是 timestamp 类型; + 1) TAGS 列的数据类型不能是 timestamp 类型;(从 2.1.3.0 版本开始,TAGS 列中支持使用 timestamp 类型,但需注意在 TAGS 中的 timestamp 列写入数据时需要提供给定值,而暂不支持四则运算,例如 `NOW + 10s` 这类表达式) 2) TAGS 列名不能与其他列名相同; - 3) TAGS 列名不能为预留关键字; + 3) TAGS 列名不能为预留关键字(参见:[参数限制与保留关键字](https://www.taosdata.com/cn/documentation/administrator#keywords) 章节); 4) TAGS 最多允许 128 个,至少 1 个,总长度不超过 16 KB。 @@ -261,10 +287,17 @@ TDengine 缺省的时间戳是毫秒精度,但通过修改配置参数 enableM - **显示当前数据库下的所有超级表信息** ```mysql - SHOW STABLES [LIKE tb_name_wildcar]; + SHOW STABLES [LIKE tb_name_wildcard]; ``` 查看数据库内全部 STable,及其相关信息,包括 STable 的名称、创建时间、列数量、标签(TAG)数量、通过该 STable 建表的数量。 +- **显示一个超级表的创建语句** + + ```mysql + SHOW CREATE STABLE stb_name; + ``` + 常用于数据库迁移。对一个已经存在的超级表,返回其创建语句;在另一个集群中执行该语句,就能得到一个结构完全相同的超级表。 + - **获取超级表的结构信息** ```mysql @@ -283,6 +316,13 @@ TDengine 缺省的时间戳是毫秒精度,但通过修改配置参数 enableM ALTER STABLE stb_name DROP COLUMN field_name; ``` +- **超级表修改列宽** + + ```mysql + ALTER STABLE stb_name MODIFY COLUMN field_name data_type(length); + ``` + 如果数据列的类型是可变长格式(BINARY 或 NCHAR),那么可以使用此指令修改其宽度(只能改大,不能改小)。(2.1.3.0 版本新增) + ## 超级表 STable 中 TAG 管理 - **添加标签** @@ -306,6 +346,13 @@ TDengine 缺省的时间戳是毫秒精度,但通过修改配置参数 enableM ``` 修改超级表的标签名,从超级表修改某个标签名后,该超级表下的所有子表也会自动更新该标签名。 +- **修改标签列宽度** + + ```mysql + ALTER STABLE stb_name MODIFY TAG tag_name data_type(length); + ``` + 如果标签的类型是可变长格式(BINARY 或 NCHAR),那么可以使用此指令修改其宽度(只能改大,不能改小)。(2.1.3.0 版本新增) + - **修改子表标签值** ```mysql @@ -315,62 +362,82 @@ TDengine 缺省的时间戳是毫秒精度,但通过修改配置参数 enableM ## 数据写入 -- **插入一条记录** +### 写入语法: + +```mysql +INSERT INTO + tb_name + [USING stb_name [(tag1_name, ...)] TAGS (tag1_value, ...)] + [(field1_name, ...)] + VALUES (field1_value, ...) [(field1_value2, ...) ...] | FILE csv_file_path + [tb2_name + [USING stb_name [(tag1_name, ...)] TAGS (tag1_value, ...)] + [(field1_name, ...)] + VALUES (field1_value, ...) [(field1_value2, ...) ...] | FILE csv_file_path + ...]; +``` + +### 详细描述及示例: + +- **插入一条或多条记录** + 指定已经创建好的数据子表的表名,并通过 VALUES 关键字提供一行或多行数据,即可向数据库写入这些数据。例如,执行如下语句可以写入一行记录: ```mysql - INSERT INTO tb_name VALUES (field_value, ...); + INSERT INTO d1001 VALUES (NOW, 10.2, 219, 0.32); ``` - 向表tb_name中插入一条记录 - -- **插入一条记录,数据对应到指定的列** + 或者,可以通过如下语句写入两行记录: ```mysql - INSERT INTO tb_name (field1_name, ...) VALUES (field1_value1, ...); + INSERT INTO d1001 VALUES ('2021-07-13 14:06:32.272', 10.2, 219, 0.32) (1626164208000, 10.15, 217, 0.33); ``` - 向表tb_name中插入一条记录,数据对应到指定的列。SQL语句中没有出现的列,数据库将自动填充为NULL。主键(时间戳)不能为NULL。 + **注意:** + 1)在第二个例子中,两行记录的首列时间戳使用了不同格式的写法。其中字符串格式的时间戳写法不受所在 DATABASE 的时间精度设置影响;而长整形格式的时间戳写法会受到所在 DATABASE 的时间精度设置影响——例子中的时间戳在毫秒精度下可以写作 1626164208000,而如果是在微秒精度设置下就需要写为 1626164208000000。 + 2)在使用“插入多条记录”方式写入数据时,不能把第一列的时间戳取值都设为 NOW,否则会导致语句中的多条记录使用相同的时间戳,于是就可能出现相互覆盖以致这些数据行无法全部被正确保存。其原因在于,NOW 函数在执行中会被解析为所在 SQL 语句的实际执行时间,出现在同一语句中的多个 NOW 标记也就会被替换为完全相同的时间戳取值。 + 3)允许插入的最老记录的时间戳,是相对于当前服务器时间,减去配置的 keep 值(数据保留的天数);允许插入的最新记录的时间戳,是相对于当前服务器时间,加上配置的 days 值(数据文件存储数据的时间跨度,单位为天)。keep 和 days 都是可以在创建数据库时指定的,缺省值分别是 3650 天和 10 天。 -- **插入多条记录** +- **插入记录,数据对应到指定的列** + 向数据子表中插入记录时,无论插入一行还是多行,都可以让数据对应到指定的列。对于 SQL 语句中没有出现的列,数据库将自动填充为 NULL。主键(时间戳)不能为 NULL。例如: ```mysql - INSERT INTO tb_name VALUES (field1_value1, ...) (field1_value2, ...) ...; + INSERT INTO d1001 (ts, current, phase) VALUES ('2021-07-13 14:06:33.196', 10.27, 0.31); ``` - 向表tb_name中插入多条记录 - **注意**:在使用“插入多条记录”方式写入数据时,不能把第一列的时间戳取值都设为now,否则会导致语句中的多条记录使用相同的时间戳,于是就可能出现相互覆盖以致这些数据行无法全部被正确保存。 + **说明:**如果不指定列,也即使用全列模式——那么在 VALUES 部分提供的数据,必须为数据表的每个列都显式地提供数据。全列模式写入速度会远快于指定列,因此建议尽可能采用全列写入方式,此时空列可以填入 NULL。 -- **按指定的列插入多条记录** +- **向多个表插入记录** + 可以在一条语句中,分别向多个表插入一条或多条记录,并且也可以在插入过程中指定列。例如: ```mysql - INSERT INTO tb_name (field1_name, ...) VALUES (field1_value1, ...) (field1_value2, ...) ...; + INSERT INTO d1001 VALUES ('2021-07-13 14:06:34.630', 10.2, 219, 0.32) ('2021-07-13 14:06:35.779', 10.15, 217, 0.33) + d1002 (ts, current, phase) VALUES ('2021-07-13 14:06:34.255', 10.27, 0.31); ``` - 向表tb_name中按指定的列插入多条记录 -- **向多个表插入多条记录** +- **插入记录时自动建表** + 如果用户在写数据时并不确定某个表是否存在,此时可以在写入数据时使用自动建表语法来创建不存在的表,若该表已存在则不会建立新表。自动建表时,要求必须以超级表为模板,并写明数据表的 TAGS 取值。例如: ```mysql - INSERT INTO tb1_name VALUES (field1_value1, ...) (field1_value2, ...) ... - tb2_name VALUES (field1_value1, ...) (field1_value2, ...) ...; + INSERT INTO d21001 USING meters TAGS ('Beijing.Chaoyang', 2) VALUES ('2021-07-13 14:06:32.272', 10.2, 219, 0.32); ``` - 同时向表tb1_name和tb2_name中分别插入多条记录 - -- **同时向多个表按列插入多条记录** + 也可以在自动建表时,只是指定部分 TAGS 列的取值,未被指定的 TAGS 列将置为 NULL。例如: ```mysql - INSERT INTO tb1_name (tb1_field1_name, ...) VALUES (field1_value1, ...) (field1_value2, ...) ... - tb2_name (tb2_field1_name, ...) VALUES (field1_value1, ...) (field1_value2, ...) ...; + INSERT INTO d21001 USING meters (groupdId) TAGS (2) VALUES ('2021-07-13 14:06:33.196', 10.15, 217, 0.33); ``` - 同时向表tb1_name和tb2_name中按列分别插入多条记录 - - 注意:允许插入的最老记录的时间戳,是相对于当前服务器时间,减去配置的keep值(数据保留的天数),允许插入的最新记录的时间戳,是相对于当前服务器时间,加上配置的days值(数据文件存储数据的时间跨度,单位为天)。keep和days都是可以在创建数据库时指定的,缺省值分别是3650天和10天。 - -- **插入记录时自动建表** + 自动建表语法也支持在一条语句中向多个表插入记录。例如: ```mysql - INSERT INTO tb_name USING stb_name TAGS (tag_value1, ...) VALUES (field_value1, ...); + INSERT INTO d21001 USING meters TAGS ('Beijing.Chaoyang', 2) VALUES ('2021-07-13 14:06:34.630', 10.2, 219, 0.32) ('2021-07-13 14:06:35.779', 10.15, 217, 0.33) + d21002 USING meters (groupdId) TAGS (2) VALUES ('2021-07-13 14:06:34.255', 10.15, 217, 0.33) + d21003 USING meters (groupdId) TAGS (2) (ts, current, phase) VALUES ('2021-07-13 14:06:34.255', 10.27, 0.31); ``` - 如果用户在写数据时并不确定某个表是否存在,此时可以在写入数据时使用自动建表语法来创建不存在的表,若该表已存在则不会建立新表。自动建表时,要求必须以超级表为模板,并写明数据表的 tags 取值。 + **说明:**在 2.0.20.5 版本之前,在使用自动建表语法并指定列时,子表的列名必须紧跟在子表名称后面,而不能如例子里那样放在 TAGS 和 VALUES 之间。从 2.0.20.5 版本开始,两种写法都可以,但不能在一条 SQL 语句中混用,否则会报语法错误。 -- **插入记录时自动建表,并指定具体的 tags 列** +- **插入来自文件的数据记录** + 除了使用 VALUES 关键字插入一行或多行数据外,也可以把要写入的数据放在 CSV 文件中(英文逗号分隔、英文单引号括住每个值)供 SQL 指令读取。其中 CSV 文件无需表头。例如,如果 /tmp/csvfile.csv 文件的内容为: + ``` + '2021-07-13 14:07:34.630', '10.2', '219', '0.32' + '2021-07-13 14:07:35.779', '10.15', '217', '0.33' + ``` + 那么通过如下指令可以把这个文件中的数据写入子表中: ```mysql - INSERT INTO tb_name USING stb_name (tag_name1, ...) TAGS (tag_value1, ...) VALUES (field_value1, ...); + INSERT INTO d1001 FILE '/tmp/csvfile.csv'; ``` - 在自动建表时,可以只是指定部分 tags 列的取值,未被指定的 tags 列将取为空值。 **历史记录写入**:可使用IMPORT或者INSERT命令,IMPORT的语法,功能与INSERT完全一样。 -说明:针对 insert 类型的 SQL 语句,我们采用的流式解析策略,在发现后面的错误之前,前面正确的部分SQL仍会执行。下面的sql中,insert语句是无效的,但是d1001仍会被创建。 +**说明:**针对 insert 类型的 SQL 语句,我们采用的流式解析策略,在发现后面的错误之前,前面正确的部分 SQL 仍会执行。下面的 SQL 中,INSERT 语句是无效的,但是 d1001 仍会被创建。 ```mysql taos> CREATE TABLE meters(ts TIMESTAMP, current FLOAT, voltage INT, phase FLOAT) TAGS(location BINARY(30), groupId INT); @@ -385,9 +452,9 @@ Query OK, 1 row(s) in set (0.001029s) taos> SHOW TABLES; Query OK, 0 row(s) in set (0.000946s) -taos> INSERT INTO d1001 USING meters TAGS('Beijing.Chaoyang', 2); +taos> INSERT INTO d1001 USING meters TAGS('Beijing.Chaoyang', 2) VALUES('a'); -DB error: invalid SQL: keyword VALUES or FILE required +DB error: invalid SQL: 'a' (invalid timestamp) (0.039494s) taos> SHOW TABLES; table_name | created_time | columns | stable_name | @@ -404,21 +471,18 @@ Query OK, 1 row(s) in set (0.001091s) SELECT select_expr [, select_expr ...] FROM {tb_name_list} [WHERE where_condition] - [INTERVAL (interval_val [, interval_offset])] - [SLIDING sliding_val] - [FILL fill_val] + [SESSION(ts_col, tol_val)] + [STATE_WINDOW(col)] + [INTERVAL(interval_val [, interval_offset]) [SLIDING sliding_val]] + [FILL(fill_mod_and_val)] [GROUP BY col_list] [ORDER BY col_list { DESC | ASC }] - [SLIMIT limit_val [, SOFFSET offset_val]] - [LIMIT limit_val [, OFFSET offset_val]] + [SLIMIT limit_val [SOFFSET offset_val]] + [LIMIT limit_val [OFFSET offset_val]] [>> export_file]; ``` -#### SELECT子句 - -一个选择子句可以是联合查询(UNION)和另一个查询的子查询(SUBQUERY)。 - -##### 通配符 +#### 通配符 通配符 * 可以用于代指全部列。对于普通表,结果中只有普通列。 ```mysql @@ -453,7 +517,7 @@ Query OK, 9 row(s) in set (0.002022s) SELECT * FROM d1001; SELECT d1001.* FROM d1001; ``` -在Join查询中,带前缀的\*和不带前缀\*返回的结果有差别, \*返回全部表的所有列数据(不包含标签),带前缀的通配符,则只返回该表的列数据。 +在JOIN查询中,带前缀的\*和不带前缀\*返回的结果有差别, \*返回全部表的所有列数据(不包含标签),带前缀的通配符,则只返回该表的列数据。 ```mysql taos> SELECT * FROM d1001, d1003 WHERE d1001.ts=d1003.ts; ts | current | voltage | phase | ts | current | voltage | phase | @@ -469,8 +533,8 @@ taos> SELECT d1001.* FROM d1001,d1003 WHERE d1001.ts = d1003.ts; Query OK, 1 row(s) in set (0.020443s) ``` -在使用SQL函数来进行查询过程中,部分SQL函数支持通配符操作。其中的区别在于: -```count(\*)```函数只返回一列。```first```、```last```、```last_row```函数则是返回全部列。 +在使用SQL函数来进行查询的过程中,部分SQL函数支持通配符操作。其中的区别在于: +```count(*)```函数只返回一列。```first```、```last```、```last_row```函数则是返回全部列。 ```mysql taos> SELECT COUNT(*) FROM d1001; @@ -488,7 +552,7 @@ taos> SELECT FIRST(*) FROM d1001; Query OK, 1 row(s) in set (0.000849s) ``` -##### 标签列 +#### 标签列 从 2.0.14 版本开始,支持在普通表的查询中指定 _标签列_,且标签列的值会与普通列的数据一起返回。 ```mysql @@ -504,12 +568,12 @@ Query OK, 2 row(s) in set (0.003112s) ##### 获取标签列的去重取值 -从 2.0.15 版本开始,支持在超级表查询标签列时,指定 distinct 关键字,这样将返回指定标签列的所有不重复取值。 +从 2.0.15 版本开始,支持在超级表查询标签列时,指定 DISTINCT 关键字,这样将返回指定标签列的所有不重复取值。 ```mysql SELECT DISTINCT tag_name FROM stb_name; ``` -注意:目前 distinct 关键字只支持对超级表的标签列进行去重,而不能用于普通列。 +注意:目前 DISTINCT 关键字只支持对超级表的标签列进行去重,而不能用于普通列。 @@ -544,7 +608,7 @@ SELECT * FROM d1001; #### 特殊功能 -部分特殊的查询功能可以不使用FROM子句执行。获取当前所在的数据库 database() +部分特殊的查询功能可以不使用FROM子句执行。获取当前所在的数据库 database(): ```mysql taos> SELECT DATABASE(); database() | @@ -552,7 +616,7 @@ taos> SELECT DATABASE(); power | Query OK, 1 row(s) in set (0.000079s) ``` -如果登录的时候没有指定默认数据库,且没有使用```use```命令切换数据,则返回NULL。 +如果登录的时候没有指定默认数据库,且没有使用```USE```命令切换数据,则返回NULL。 ```mysql taos> SELECT DATABASE(); database() | @@ -560,7 +624,7 @@ taos> SELECT DATABASE(); NULL | Query OK, 1 row(s) in set (0.000184s) ``` -获取服务器和客户端版本号: +获取服务器和客户端版本号: ```mysql taos> SELECT CLIENT_VERSION(); client_version() | @@ -604,7 +668,7 @@ SELECT TBNAME, location FROM meters; ```mysql SELECT COUNT(TBNAME) FROM meters; ``` -以上两个查询均只支持在Where条件子句中添加针对标签(TAGS)的过滤条件。例如: +以上两个查询均只支持在WHERE条件子句中添加针对标签(TAGS)的过滤条件。例如: ```mysql taos> SELECT TBNAME, location FROM meters; tbname | location | @@ -622,64 +686,94 @@ taos> SELECT COUNT(tbname) FROM meters WHERE groupId > 2; Query OK, 1 row(s) in set (0.001091s) ``` -- 可以使用 * 返回所有列,或指定列名。可以对数字列进行四则运算,可以给输出的列取列名 -- WHERE 语句可以使用各种逻辑判断来过滤数字值,或使用通配符来过滤字符串 +- 可以使用 * 返回所有列,或指定列名。可以对数字列进行四则运算,可以给输出的列取列名。 + * 暂不支持含列名的四则运算表达式用于条件过滤算子(例如,不支持 `where a*2>6;`,但可以写 `where a>6/2;`)。 + * 暂不支持含列名的四则运算表达式作为 SQL 函数的应用对象(例如,不支持 `select min(2*a) from t;`,但可以写 `select 2*min(a) from t;`)。 +- WHERE 语句可以使用各种逻辑判断来过滤数字值,或使用通配符来过滤字符串。 - 输出结果缺省按首列时间戳升序排序,但可以指定按降序排序( _c0 指首列时间戳)。使用 ORDER BY 对其他字段进行排序为非法操作。 -- 参数 LIMIT 控制输出条数,OFFSET 指定从第几条开始输出。LIMIT/OFFSET 对结果集的执行顺序在 ORDER BY 之后。 -- 参数 SLIMIT 控制由 GROUP BY 指令划分的每个分组中的输出条数。 -- 通过”>>"输出结果可以导出到指定文件 +- 参数 LIMIT 控制输出条数,OFFSET 指定从第几条开始输出。LIMIT/OFFSET 对结果集的执行顺序在 ORDER BY 之后。且 `LIMIT 5 OFFSET 2` 可以简写为 `LIMIT 2, 5`。 + * 在有 GROUP BY 子句的情况下,LIMIT 参数控制的是每个分组中至多允许输出的条数。 +- 参数 SLIMIT 控制由 GROUP BY 指令划分的分组中,至多允许输出几个分组的数据。且 `SLIMIT 5 SOFFSET 2` 可以简写为 `SLIMIT 2, 5`。 +- 通过 “>>” 输出结果可以导出到指定文件。 ### 支持的条件过滤操作 -| Operation | Note | Applicable Data Types | -| ----------- | ----------------------------- | ------------------------------------- | -| > | larger than | **`timestamp`** and all numeric types | -| < | smaller than | **`timestamp`** and all numeric types | -| >= | larger than or equal to | **`timestamp`** and all numeric types | -| <= | smaller than or equal to | **`timestamp`** and all numeric types | -| = | equal to | all types | -| <> | not equal to | all types | -| between and | within a certain range | **`timestamp`** and all numeric types | -| % | match with any char sequences | **`binary`** **`nchar`** | -| _ | match with a single char | **`binary`** **`nchar`** | - -1. 同时进行多个字段的范围过滤,需要使用关键词 AND 来连接不同的查询条件,暂不支持 OR 连接的不同列之间的查询过滤条件。 -2. 针对单一字段的过滤,如果是时间过滤条件,则一条语句中只支持设定一个;但针对其他的(普通)列或标签列,则可以使用 `OR` 关键字进行组合条件的查询过滤。例如:((value > 20 AND value < 30) OR (value < 12)) 。 -3. 从 2.0.17 版本开始,条件过滤开始支持 BETWEEN AND 语法,例如 `WHERE col2 BETWEEN 1.5 AND 3.25` 表示查询条件为“1.5 ≤ col2 ≤ 3.25”。 +| **Operation** | **Note** | **Applicable Data Types** | +| --------------- | ----------------------------- | ----------------------------------------- | +| > | larger than | **`timestamp`** and all numeric types | +| < | smaller than | **`timestamp`** and all numeric types | +| >= | larger than or equal to | **`timestamp`** and all numeric types | +| <= | smaller than or equal to | **`timestamp`** and all numeric types | +| = | equal to | all types | +| <> | not equal to | all types | +| between and | within a certain range | **`timestamp`** and all numeric types | +| in | matches any value in a set | all types except first column `timestamp` | +| % | match with any char sequences | **`binary`** **`nchar`** | +| _ | match with a single char | **`binary`** **`nchar`** | + +1. <> 算子也可以写为 != ,请注意,这个算子不能用于数据表第一列的 timestamp 字段。 +2. 同时进行多个字段的范围过滤,需要使用关键词 AND 来连接不同的查询条件,暂不支持 OR 连接的不同列之间的查询过滤条件。 +3. 针对单一字段的过滤,如果是时间过滤条件,则一条语句中只支持设定一个;但针对其他的(普通)列或标签列,则可以使用 `OR` 关键字进行组合条件的查询过滤。例如: `((value > 20 AND value < 30) OR (value < 12))`。 +4. 从 2.0.17.0 版本开始,条件过滤开始支持 BETWEEN AND 语法,例如 `WHERE col2 BETWEEN 1.5 AND 3.25` 表示查询条件为“1.5 ≤ col2 ≤ 3.25”。 +5. 从 2.1.4.0 版本开始,条件过滤开始支持 IN 算子,例如 `WHERE city IN ('Beijing', 'Shanghai')`。说明:BOOL 类型写作 `{true, false}` 或 `{0, 1}` 均可,但不能写作 0、1 之外的整数;FLOAT 和 DOUBLE 类型会受到浮点数精度影响,集合内的值在精度范围内认为和数据行的值完全相等才能匹配成功;TIMESTAMP 类型支持非主键的列。 + + + + +### UNION ALL 操作符 + +```mysql +SELECT ... +UNION ALL SELECT ... +[UNION ALL SELECT ...] +``` + +TDengine 支持 UNION ALL 操作符。也就是说,如果多个 SELECT 子句返回结果集的结构完全相同(列名、列类型、列数、顺序),那么可以通过 UNION ALL 把这些结果集合并到一起。目前只支持 UNION ALL 模式,也即在结果集的合并过程中是不去重的。 ### SQL 示例 -- 对于下面的例子,表tb1用以下语句创建 +- 对于下面的例子,表tb1用以下语句创建: ```mysql CREATE TABLE tb1 (ts TIMESTAMP, col1 INT, col2 FLOAT, col3 BINARY(50)); ``` -- 查询tb1刚过去的一个小时的所有记录 +- 查询tb1刚过去的一个小时的所有记录: ```mysql SELECT * FROM tb1 WHERE ts >= NOW - 1h; ``` -- 查询表tb1从2018-06-01 08:00:00.000 到2018-06-02 08:00:00.000时间范围,并且col3的字符串是'nny'结尾的记录,结果按照时间戳降序 +- 查询表tb1从2018-06-01 08:00:00.000 到2018-06-02 08:00:00.000时间范围,并且col3的字符串是'nny'结尾的记录,结果按照时间戳降序: ```mysql SELECT * FROM tb1 WHERE ts > '2018-06-01 08:00:00.000' AND ts <= '2018-06-02 08:00:00.000' AND col3 LIKE '%nny' ORDER BY ts DESC; ``` -- 查询col1与col2的和,并取名complex, 时间大于2018-06-01 08:00:00.000, col2大于1.2,结果输出仅仅10条记录,从第5条开始 +- 查询col1与col2的和,并取名complex, 时间大于2018-06-01 08:00:00.000, col2大于1.2,结果输出仅仅10条记录,从第5条开始: ```mysql SELECT (col1 + col2) AS 'complex' FROM tb1 WHERE ts > '2018-06-01 08:00:00.000' AND col2 > 1.2 LIMIT 10 OFFSET 5; ``` -- 查询过去10分钟的记录,col2的值大于3.14,并且将结果输出到文件 `/home/testoutpu.csv`. +- 查询过去10分钟的记录,col2的值大于3.14,并且将结果输出到文件 `/home/testoutpu.csv`: ```mysql SELECT COUNT(*) FROM tb1 WHERE ts >= NOW - 10m AND col2 > 3.14 >> /home/testoutpu.csv; ``` -## SQL 函数 + +## SQL 函数 ### 聚合函数 @@ -695,11 +789,11 @@ TDengine支持针对数据的聚合查询。提供支持的聚合和选择函数 应用字段:应用全部字段。 - 适用于:表、超级表。 + 适用于:**表、超级表**。 说明: - 1)可以使用星号*来替代具体的字段,使用星号(*)返回全部记录数量。 + 1)可以使用星号(\*)来替代具体的字段,使用星号(\*)返回全部记录数量。 2)针对同一表的(不包含NULL值)字段查询结果均相同。 @@ -730,7 +824,7 @@ TDengine支持针对数据的聚合查询。提供支持的聚合和选择函数 应用字段:不能应用在timestamp、binary、nchar、bool字段。 - 适用于:表、超级表。 + 适用于:**表、超级表**。 示例: ```mysql @@ -757,7 +851,23 @@ TDengine支持针对数据的聚合查询。提供支持的聚合和选择函数 应用字段:不能应用在timestamp、binary、nchar、bool类型字段。 - 适用于:表。 + 适用于:**表、(超级表)**。 + + 说明:从 2.1.3.0 版本开始,TWA 函数可以在由 GROUP BY 划分出单独时间线的情况下用于超级表(也即 GROUP BY tbname)。 + +- **IRATE** + ```mysql + SELECT IRATE(field_name) FROM tb_name WHERE clause; + ``` + 功能说明:计算瞬时增长率。使用时间区间中最后两个样本数据来计算瞬时增长速率;如果这两个值呈递减关系,那么只取最后一个数用于计算,而不是使用二者差值。 + + 返回结果数据类型:双精度浮点数Double。 + + 应用字段:不能应用在timestamp、binary、nchar、bool类型字段。 + + 适用于:**表、(超级表)**。 + + 说明:(从 2.1.3.0 版本开始新增此函数)IRATE 可以在由 GROUP BY 划分出单独时间线的情况下用于超级表(也即 GROUP BY tbname)。 - **SUM** ```mysql @@ -769,7 +879,7 @@ TDengine支持针对数据的聚合查询。提供支持的聚合和选择函数 应用字段:不能应用在timestamp、binary、nchar、bool类型字段。 - 适用于:表、超级表。 + 适用于:**表、超级表**。 示例: ```mysql @@ -796,7 +906,7 @@ TDengine支持针对数据的聚合查询。提供支持的聚合和选择函数 应用字段:不能应用在timestamp、binary、nchar、bool类型字段。 - 适用于:表。(从 2.0.15.1 版本开始,本函数也支持超级表) + 适用于:**表**。(从 2.0.15.1 版本开始,本函数也支持**超级表**) 示例: ```mysql @@ -819,7 +929,7 @@ TDengine支持针对数据的聚合查询。提供支持的聚合和选择函数 说明:自变量是时间戳,因变量是该列的值。 - 适用于:表。 + 适用于:**表**。 示例: ```mysql @@ -842,6 +952,8 @@ TDengine支持针对数据的聚合查询。提供支持的聚合和选择函数 应用字段:不能应用在timestamp、binary、nchar、bool类型字段。 + 适用于:**表、超级表**。 + 示例: ```mysql taos> SELECT MIN(current), MIN(voltage) FROM meters; @@ -867,6 +979,8 @@ TDengine支持针对数据的聚合查询。提供支持的聚合和选择函数 应用字段:不能应用在timestamp、binary、nchar、bool类型字段。 + 适用于:**表、超级表**。 + 示例: ```mysql taos> SELECT MAX(current), MAX(voltage) FROM meters; @@ -892,6 +1006,8 @@ TDengine支持针对数据的聚合查询。提供支持的聚合和选择函数 应用字段:所有字段。 + 适用于:**表、超级表**。 + 说明: 1)如果要返回各个列的首个(时间戳最小)非NULL值,可以使用FIRST(\*); @@ -925,6 +1041,8 @@ TDengine支持针对数据的聚合查询。提供支持的聚合和选择函数 应用字段:所有字段。 + 适用于:**表、超级表**。 + 说明: 1)如果要返回各个列的最后(时间戳最大)一个非NULL值,可以使用LAST(\*); @@ -950,17 +1068,21 @@ TDengine支持针对数据的聚合查询。提供支持的聚合和选择函数 ```mysql SELECT TOP(field_name, K) FROM { tb_name | stb_name } [WHERE clause]; ``` - 功能说明: 统计表/超级表中某列的值最大*k*个非NULL值。若多于k个列值并列最大,则返回时间戳小的。 + 功能说明: 统计表/超级表中某列的值最大 *k* 个非 NULL 值。如果多条数据取值一样,全部取用又会超出 k 条限制时,系统会从相同值中随机选取符合要求的数量返回。 返回结果数据类型:同应用的字段。 应用字段:不能应用在timestamp、binary、nchar、bool类型字段。 + 适用于:**表、超级表**。 + 说明: 1)*k*值取值范围1≤*k*≤100; - 2)系统同时返回该记录关联的时间戳列。 + 2)系统同时返回该记录关联的时间戳列; + + 3)限制:TOP函数不支持FILL子句。 示例: ```mysql @@ -984,17 +1106,21 @@ TDengine支持针对数据的聚合查询。提供支持的聚合和选择函数 ```mysql SELECT BOTTOM(field_name, K) FROM { tb_name | stb_name } [WHERE clause]; ``` - 功能说明:统计表/超级表中某列的值最小*k*个非NULL值。若多于k个列值并列最小,则返回时间戳小的。 + 功能说明:统计表/超级表中某列的值最小 *k* 个非 NULL 值。如果多条数据取值一样,全部取用又会超出 k 条限制时,系统会从相同值中随机选取符合要求的数量返回。 返回结果数据类型:同应用的字段。 应用字段:不能应用在timestamp、binary、nchar、bool类型字段。 + 适用于:**表、超级表**。 + 说明: 1)*k*值取值范围1≤*k*≤100; - 2)系统同时返回该记录关联的时间戳列。 + 2)系统同时返回该记录关联的时间戳列; + + 3)限制:BOTTOM函数不支持FILL子句。 示例: ```mysql @@ -1023,6 +1149,8 @@ TDengine支持针对数据的聚合查询。提供支持的聚合和选择函数 应用字段:不能应用在timestamp、binary、nchar、bool类型字段。 + 适用于:**表**。 + 说明:*P*值取值范围0≤*P*≤100,为0的时候等同于MIN,为100的时候等同于MAX。 示例: @@ -1038,12 +1166,14 @@ TDengine支持针对数据的聚合查询。提供支持的聚合和选择函数 ```mysql SELECT APERCENTILE(field_name, P) FROM { tb_name | stb_name } [WHERE clause]; ``` - 功能说明:统计表中某列的值百分比分位数,与PERCENTILE函数相似,但是返回近似结果。 + 功能说明:统计表/超级表中某列的值百分比分位数,与PERCENTILE函数相似,但是返回近似结果。 返回结果数据类型: 双精度浮点数Double。 应用字段:不能应用在timestamp、binary、nchar、bool类型字段。 + 适用于:**表、超级表**。 + 说明:*P*值取值范围0≤*P*≤100,为0的时候等同于MIN,为100的时候等同于MAX。推荐使用```APERCENTILE```函数,该函数性能远胜于```PERCENTILE```函数 ```mysql @@ -1058,13 +1188,17 @@ TDengine支持针对数据的聚合查询。提供支持的聚合和选择函数 ```mysql SELECT LAST_ROW(field_name) FROM { tb_name | stb_name }; ``` - 功能说明:返回表(超级表)的最后一条记录。 + 功能说明:返回表/超级表的最后一条记录。 返回结果数据类型:同应用的字段。 应用字段:所有字段。 - 说明:与last函数不同,last_row不支持时间范围限制,强制返回最后一条记录。 + 适用于:**表、超级表**。 + + 说明:与LAST函数不同,LAST_ROW不支持时间范围限制,强制返回最后一条记录。 + + 限制:LAST_ROW()不能与INTERVAL一起使用。 示例: ```mysql @@ -1082,17 +1216,20 @@ TDengine支持针对数据的聚合查询。提供支持的聚合和选择函数 ``` ### 计算函数 + - **DIFF** ```mysql SELECT DIFF(field_name) FROM tb_name [WHERE clause]; ``` 功能说明:统计表中某列的值与前一行对应值的差。 - 返回结果数据类型: 同应用字段。 + 返回结果数据类型:同应用字段。 应用字段:不能应用在timestamp、binary、nchar、bool类型字段。 - 说明:输出结果行数是范围内总行数减一,第一行没有结果输出。 + 适用于:**表、(超级表)**。 + + 说明:输出结果行数是范围内总行数减一,第一行没有结果输出。从 2.1.3.0 版本开始,DIFF 函数可以在由 GROUP BY 划分出单独时间线的情况下用于超级表(也即 GROUP BY tbname)。 示例: ```mysql @@ -1104,16 +1241,32 @@ TDengine支持针对数据的聚合查询。提供支持的聚合和选择函数 Query OK, 2 row(s) in set (0.001162s) ``` +- **DERIVATIVE** + ```mysql + SELECT DERIVATIVE(field_name, time_interval, ignore_negative) FROM tb_name [WHERE clause]; + ``` + 功能说明:统计表中某列数值的单位变化率。其中单位时间区间的长度可以通过 time_interval 参数指定,最小可以是 1 秒(1s);ignore_negative 参数的值可以是 0 或 1,为 1 时表示忽略负值。 + + 返回结果数据类型:双精度浮点数。 + + 应用字段:不能应用在 timestamp、binary、nchar、bool 类型字段。 + + 适用于:**表、(超级表)**。 + + 说明:(从 2.1.3.0 版本开始新增此函数)输出结果行数是范围内总行数减一,第一行没有结果输出。DERIVATIVE 函数可以在由 GROUP BY 划分出单独时间线的情况下用于超级表(也即 GROUP BY tbname)。 + - **SPREAD** ```mysql SELECT SPREAD(field_name) FROM { tb_name | stb_name } [WHERE clause]; ``` 功能说明:统计表/超级表中某列的最大值和最小值之差。 - 返回结果数据类型: 双精度浮点数。 + 返回结果数据类型:双精度浮点数。 应用字段:不能应用在binary、nchar、bool类型字段。 + 适用于:**表、超级表**。 + 说明:可用于TIMESTAMP字段,此时表示记录的时间覆盖范围。 示例: @@ -1142,6 +1295,8 @@ TDengine支持针对数据的聚合查询。提供支持的聚合和选择函数 应用字段:不能应用在timestamp、binary、nchar、bool类型字段。 + 适用于:**表、超级表**。 + 说明: 1)支持两列或多列之间进行计算,可使用括号控制计算优先级; @@ -1158,49 +1313,57 @@ TDengine支持针对数据的聚合查询。提供支持的聚合和选择函数 Query OK, 3 row(s) in set (0.001046s) ``` -## 时间维度聚合 +## 按窗口切分聚合 -TDengine支持按时间段进行聚合,可以将表中数据按照时间段进行切割后聚合生成结果,比如温度传感器每秒采集一次数据,但需查询每隔10分钟的温度平均值。这个聚合适合于降维(down sample)操作, 语法如下: +TDengine 支持按时间段等窗口切分方式进行聚合结果查询,比如温度传感器每秒采集一次数据,但需查询每隔 10 分钟的温度平均值。这类聚合适合于降维(down sample)操作,语法如下: ```mysql SELECT function_list FROM tb_name [WHERE where_condition] - INTERVAL (interval [, offset]) - [SLIDING sliding] - [FILL ({NONE | VALUE | PREV | NULL | LINEAR})] + [SESSION(ts_col, tol_val)] + [STATE_WINDOW(col)] + [INTERVAL(interval [, offset]) [SLIDING sliding]] + [FILL({NONE | VALUE | PREV | NULL | LINEAR | NEXT})] SELECT function_list FROM stb_name [WHERE where_condition] - INTERVAL (interval [, offset]) - [SLIDING sliding] - [FILL ({ VALUE | PREV | NULL | LINEAR})] + [SESSION(ts_col, tol_val)] + [STATE_WINDOW(col)] + [INTERVAL(interval [, offset]) [SLIDING sliding]] + [FILL({NONE | VALUE | PREV | NULL | LINEAR | NEXT})] [GROUP BY tags] ``` -- 聚合时间段的长度由关键词INTERVAL指定,最短时间间隔10毫秒(10a),并且支持偏移(偏移必须小于间隔)。聚合查询中,能够同时执行的聚合和选择函数仅限于单个输出的函数:count、avg、sum 、stddev、leastsquares、percentile、min、max、first、last,不能使用具有多行输出结果的函数(例如:top、bottom、diff以及四则运算)。 -- WHERE语句可以指定查询的起止时间和其他过滤条件 -- SLIDING语句用于指定聚合时间段的前向增量 -- FILL语句指定某一时间区间数据缺失的情况下的填充模式。填充模式包括以下几种: - * 不进行填充:NONE(默认填充模式)。 - * VALUE填充:固定值填充,此时需要指定填充的数值。例如:fill(value, 1.23)。 - * NULL填充:使用NULL填充数据。例如:fill(null)。 - * PREV填充:使用前一个非NULL值填充数据。例如:fill(prev)。 +- 在聚合查询中,function_list 位置允许使用聚合和选择函数,并要求每个函数仅输出单个结果(例如:COUNT、AVG、SUM、STDDEV、LEASTSQUARES、PERCENTILE、MIN、MAX、FIRST、LAST),而不能使用具有多行输出结果的函数(例如:TOP、BOTTOM、DIFF 以及四则运算)。 +- 查询过滤、聚合等操作按照每个切分窗口为独立的单位执行。聚合查询目前支持三种窗口的划分方式: + 1. 时间窗口:聚合时间段的窗口宽度由关键词 INTERVAL 指定,最短时间间隔 10 毫秒(10a);并且支持偏移 offset(偏移必须小于间隔),也即时间窗口划分与“UTC 时刻 0”相比的偏移量。SLIDING 语句用于指定聚合时间段的前向增量,也即每次窗口向前滑动的时长。当 SLIDING 与 INTERVAL 取值相等的时候,滑动窗口即为翻转窗口。 + * 从 2.1.5.0 版本开始,INTERVAL 语句允许的最短时间间隔调整为 1 微秒(1u),当然如果所查询的 DATABASE 的时间精度设置为毫秒级,那么允许的最短时间间隔为 1 毫秒(1a)。 + * **注意:**用到 INTERVAL 语句时,除非极特殊的情况,都要求把客户端和服务端的 taos.cfg 配置文件中的 timezone 参数配置为相同的取值,以避免时间处理函数频繁进行跨时区转换而导致的严重性能影响。 + 2. 状态窗口:使用整数或布尔值来标识产生记录时设备的状态量,产生的记录如果具有相同的状态量取值则归属于同一个状态窗口,数值改变后该窗口关闭。状态量所对应的列作为 STATE_WINDOW 语句的参数来指定。 + 3. 会话窗口:时间戳所在的列由 SESSION 语句的 ts_col 参数指定,会话窗口根据相邻两条记录的时间戳差值来确定是否属于同一个会话——如果时间戳差异在 tol_val 以内,则认为记录仍属于同一个窗口;如果时间变化超过 tol_val,则自动开启下一个窗口。 +- WHERE 语句可以指定查询的起止时间和其他过滤条件。 +- FILL 语句指定某一窗口区间数据缺失的情况下的填充模式。填充模式包括以下几种: + 1. 不进行填充:NONE(默认填充模式)。 + 2. VALUE 填充:固定值填充,此时需要指定填充的数值。例如:FILL(VALUE, 1.23)。 + 3. PREV 填充:使用前一个非 NULL 值填充数据。例如:FILL(PREV)。 + 4. NULL 填充:使用 NULL 填充数据。例如:FILL(NULL)。 + 5. LINEAR 填充:根据前后距离最近的非 NULL 值做线性插值填充。例如:FILL(LINEAR)。 + 6. NEXT 填充:使用下一个非 NULL 值填充数据。例如:FILL(NEXT)。 说明: - 1. 使用FILL语句的时候可能生成大量的填充输出,务必指定查询的时间区间。针对每次查询,系统可返回不超过1千万条具有插值的结果。 + 1. 使用 FILL 语句的时候可能生成大量的填充输出,务必指定查询的时间区间。针对每次查询,系统可返回不超过 1 千万条具有插值的结果。 2. 在时间维度聚合中,返回的结果中时间序列严格单调递增。 - 3. 如果查询对象是超级表,则聚合函数会作用于该超级表下满足值过滤条件的所有表的数据。如果查询中没有使用group by语句,则返回的结果按照时间序列严格单调递增;如果查询中使用了group by语句分组,则返回结果中每个group内不按照时间序列严格单调递增。 + 3. 如果查询对象是超级表,则聚合函数会作用于该超级表下满足值过滤条件的所有表的数据。如果查询中没有使用 GROUP BY 语句,则返回的结果按照时间序列严格单调递增;如果查询中使用了 GROUP BY 语句分组,则返回结果中每个 GROUP 内不按照时间序列严格单调递增。 时间聚合也常被用于连续查询场景,可以参考文档 [连续查询(Continuous Query)](https://www.taosdata.com/cn/documentation/advanced-features#continuous-query)。 -**示例:** 智能电表的建表语句如下: +**示例**: 智能电表的建表语句如下: ```mysql CREATE TABLE meters (ts TIMESTAMP, current FLOAT, voltage INT, phase FLOAT) TAGS (location BINARY(64), groupId INT); ``` -针对智能电表采集的数据,以10分钟为一个阶段,计算过去24小时的电流数据的平均值、最大值、电流的中位数、以及随着时间变化的电流走势拟合直线。如果没有计算值,用前一个非NULL值填充。 -使用的查询语句如下: +针对智能电表采集的数据,以 10 分钟为一个阶段,计算过去 24 小时的电流数据的平均值、最大值、电流的中位数、以及随着时间变化的电流走势拟合直线。如果没有计算值,用前一个非 NULL 值填充。使用的查询语句如下: ```mysql SELECT AVG(current), MAX(current), LEASTSQUARES(current, start_val, step_val), PERCENTILE(current, 50) FROM meters @@ -1214,21 +1377,22 @@ SELECT AVG(current), MAX(current), LEASTSQUARES(current, start_val, step_val), P - 数据库名最大长度为 32 - 表名最大长度为 192,每行数据最大长度 16k 个字符(注意:数据行内每个 BINARY/NCHAR 类型的列还会额外占用 2 个字节的存储位置) - 列名最大长度为 64,最多允许 1024 列,最少需要 2 列,第一列必须是时间戳 -- 标签最多允许 128 个,可以 1 个,标签总长度不超过 16k 个字符 +- 标签名最大长度为 64,最多允许 128 个,可以 1 个,一个表中标签值的总长度不超过 16k 个字符 - SQL 语句最大长度 65480 个字符,但可通过系统配置参数 maxSQLLength 修改,最长可配置为 1M +- SELECT 语句的查询结果,最多允许返回 1024 列(语句中的函数调用可能也会占用一些列空间),超限时需要显式指定较少的返回数据列,以避免语句执行报错。 - 库的数目,超级表的数目、表的数目,系统不做限制,仅受系统资源限制 ## TAOS SQL其他约定 -**group by的限制** +**GROUP BY的限制** -TAOS SQL支持对标签、tbname进行group by操作,也支持普通列进行group by,前提是:仅限一列且该列的唯一值小于10万个。 +TAOS SQL支持对标签、TBNAME进行GROUP BY操作,也支持普通列进行GROUP BY,前提是:仅限一列且该列的唯一值小于10万个。 -**join操作的限制** +**JOIN操作的限制** TAOS SQL支持表之间按主键时间戳来join两张表的列,暂不支持两个表之间聚合后的四则运算。 -**is not null与不为空的表达式适用范围** +**IS NOT NULL与不为空的表达式适用范围** -is not null支持所有类型的列。不为空的表达式为 <>"",仅对非数值类型的列适用。 +IS NOT NULL支持所有类型的列。不为空的表达式为 <>"",仅对非数值类型的列适用。 diff --git a/documentation20/cn/13.faq/docs.md b/documentation20/cn/13.faq/docs.md index c01247d345906bde021d88841d34f667c8991fa9..300ff27fe457fe50c78a4b5090ec20ea8edd8957 100644 --- a/documentation20/cn/13.faq/docs.md +++ b/documentation20/cn/13.faq/docs.md @@ -26,17 +26,17 @@ ## 2. Windows平台下JDBCDriver找不到动态链接库,怎么办? -请看为此问题撰写的[技术博客](https://www.taosdata.com/blog/2019/12/03/jdbcdriver找不到动态链接库/) +请看为此问题撰写的[技术博客](https://www.taosdata.com/blog/2019/12/03/950.html) ## 3. 创建数据表时提示more dnodes are needed -请看为此问题撰写的[技术博客](https://www.taosdata.com/blog/2019/12/03/创建数据表时提示more-dnodes-are-needed/) +请看为此问题撰写的[技术博客](https://www.taosdata.com/blog/2019/12/03/965.html) ## 4. 如何让TDengine crash时生成core文件? -请看为此问题撰写的[技术博客](https://www.taosdata.com/blog/2019/12/06/tdengine-crash时生成core文件的方法/) +请看为此问题撰写的[技术博客](https://www.taosdata.com/blog/2019/12/06/974.html) -## 5. 遇到错误"Unable to establish connection", 我怎么办? +## 5. 遇到错误“Unable to establish connection”, 我怎么办? 客户端遇到连接故障,请按照下面的步骤进行检查: @@ -51,13 +51,13 @@ 4. 确认客户端连接时指定了正确的服务器FQDN (Fully Qualified Domain Name(可在服务器上执行Linux命令hostname -f获得)),FQDN配置参考:[一篇文章说清楚TDengine的FQDN](https://www.taosdata.com/blog/2020/09/11/1824.html)。 -5. ping服务器FQDN,如果没有反应,请检查你的网络,DNS设置,或客户端所在计算机的系统hosts文件 +5. ping服务器FQDN,如果没有反应,请检查你的网络,DNS设置,或客户端所在计算机的系统hosts文件。如果部署的是TDengine集群,客户端需要能ping通所有集群节点的FQDN。 6. 检查防火墙设置(Ubuntu 使用 ufw status,CentOS 使用 firewall-cmd --list-port),确认TCP/UDP 端口6030-6042 是打开的 7. 对于Linux上的JDBC(ODBC, Python, Go等接口类似)连接, 确保*libtaos.so*在目录*/usr/local/taos/driver*里, 并且*/usr/local/taos/driver*在系统库函数搜索路径*LD_LIBRARY_PATH*里 -8. 对于windows上的JDBC, ODBC, Python, Go等连接,确保*C:\TDengine\driver\taos.dll*在你的系统库函数搜索目录里 (建议*taos.dll*放在目录 *C:\Windows\System32*) +8. 对于Windows上的JDBC, ODBC, Python, Go等连接,确保*C:\TDengine\driver\taos.dll*在你的系统库函数搜索目录里 (建议*taos.dll*放在目录 *C:\Windows\System32*) 9. 如果仍不能排除连接故障 @@ -70,7 +70,8 @@ 10. 也可以使用taos程序内嵌的网络连通检测功能,来验证服务器和客户端之间指定的端口连接是否通畅(包括TCP和UDP):[TDengine 内嵌网络检测工具使用指南](https://www.taosdata.com/blog/2020/09/08/1816.html)。 -## 6. 遇到错误“Unexpected generic error in RPC”或者"TDengine Error: Unable to resolve FQDN", 我怎么办? +## 6. 遇到错误“Unexpected generic error in RPC”或者“Unable to resolve FQDN”,我怎么办? + 产生这个错误,是由于客户端或数据节点无法解析FQDN(Fully Qualified Domain Name)导致。对于TAOS Shell或客户端应用,请做如下检查: 1. 请检查连接的服务器的FQDN是否正确,FQDN配置参考:[一篇文章说清楚TDengine的FQDN](https://www.taosdata.com/blog/2020/09/11/1824.html)。 @@ -86,6 +87,7 @@ TDengine还没有一组专用的validation queries。然而建议你使用系统监测的数据库”log"来做。 + ## 9. 我可以删除或更新一条记录吗? TDengine 目前尚不支持删除功能,未来根据用户需求可能会支持。 @@ -102,7 +104,7 @@ TDengine 目前尚不支持删除功能,未来根据用户需求可能会支 批量插入。每条写入语句可以一张表同时插入多条记录,也可以同时插入多张表的多条记录。 -## 12. 最有效的写入数据的方法是什么?windows系统下插入的nchar类数据中的汉字被解析成了乱码如何解决? +## 12. Windows系统下插入的nchar类数据中的汉字被解析成了乱码如何解决? Windows下插入nchar类的数据中如果有中文,请先确认系统的地区设置成了中国(在Control Panel里可以设置),这时cmd中的`taos`客户端应该已经可以正常工作了;如果是在IDE里开发Java应用,比如Eclipse, Intellij,请确认IDE里的文件编码为GBK(这是Java默认的编码类型),然后在生成Connection时,初始化客户端的配置,具体语句如下: ```JAVA @@ -115,15 +117,15 @@ Connection = DriverManager.getConnection(url, properties); ## 13.JDBC报错: the excuted SQL is not a DML or a DDL? 请更新至最新的JDBC驱动 -```JAVA +```xml com.taosdata.jdbc taos-jdbcdriver - 2.0.4 + 2.0.27 ``` -## 14. taos connect failed, reason: invalid timestamp +## 14. taos connect failed, reason: invalid timestamp 常见原因是服务器和客户端时间没有校准,可以通过和时间服务器同步的方式(Linux 下使用 ntpdate 命令,Windows 在系统时间设置中选择自动同步)校准。 @@ -157,7 +159,8 @@ ALTER LOCAL RESETLOG; 其含义是,清空本机所有由客户端生成的日志文件。 -## 18. 时间戳的时区信息是怎样处理的? + +## 18. 时间戳的时区信息是怎样处理的? TDengine 中时间戳的时区总是由客户端进行处理,而与服务端无关。具体来说,客户端会对 SQL 语句中的时间戳进行时区转换,转为 UTC 时区(即 Unix 时间戳——Unix Timestamp)再交由服务端进行写入和查询;在读取数据时,服务端也是采用 UTC 时区提供原始数据,客户端收到后再根据本地设置,把时间戳转换为本地系统所要求的时区进行显示。 @@ -166,3 +169,19 @@ TDengine 中时间戳的时区总是由客户端进行处理,而与服务端 2. 如果在 taos.cfg 中设置了 timezone 参数,则客户端会以这个配置文件中的设置为准。 3. 如果在 C/C++/Java/Python 等各种编程语言的 Connector Driver 中,在建立数据库连接时显式指定了 timezone,那么会以这个指定的时区设置为准。例如 Java Connector 的 JDBC URL 中就有 timezone 参数。 4. 在书写 SQL 语句时,也可以直接使用 Unix 时间戳(例如 `1554984068000`)或带有时区的时间戳字符串,也即以 RFC 3339 格式(例如 `2013-04-12T15:52:01.123+08:00`)或 ISO-8601 格式(例如 `2013-04-12T15:52:01.123+0800`)来书写时间戳,此时这些时间戳的取值将不再受其他时区设置的影响。 + + +## 19. TDengine 都会用到哪些网络端口? + +在 TDengine 2.0 版本中,会用到以下这些网络端口(以默认端口 6030 为前提进行说明,如果修改了配置文件中的设置,那么这里列举的端口都会出现变化),管理员可以参考这里的信息调整防火墙设置: + +| 协议 | 默认端口 | 用途说明 | 修改方法 | +| :--- | :-------- | :---------------------------------- | :------------------------------- | +| TCP | 6030 | 客户端与服务端之间通讯。 | 由配置文件设置 serverPort 决定。 | +| TCP | 6035 | 多节点集群的节点间通讯。 | 随 serverPort 端口变化。 | +| TCP | 6040 | 多节点集群的节点间数据同步。 | 随 serverPort 端口变化。 | +| TCP | 6041 | 客户端与服务端之间的 RESTful 通讯。 | 随 serverPort 端口变化。 | +| TCP | 6042 | Arbitrator 的服务端口。 | 因 Arbitrator 启动参数设置变化。 | +| TCP | 6060 | 企业版内 Monitor 服务的网络端口。 | | +| UDP | 6030-6034 | 客户端与服务端之间通讯。 | 随 serverPort 端口变化。 | +| UDP | 6035-6039 | 多节点集群的节点间通讯。 | 随 serverPort 端口变化。 | diff --git a/documentation20/en/00.index/docs.md b/documentation20/en/00.index/docs.md new file mode 100644 index 0000000000000000000000000000000000000000..a10c22ee622fe71f4215c981774b637fc7c177d9 --- /dev/null +++ b/documentation20/en/00.index/docs.md @@ -0,0 +1,142 @@ +# TDengine Documentation + +TDengine is a highly efficient platform to store, query, and analyze time-series data. It is specially designed and optimized for IoT, Internet of Vehicles, Industrial IoT, IT Infrastructure and Application Monitoring, etc. It works like a relational database, such as MySQL, but you are strongly encouraged to read through the following documentation before you experience it, especially the Data Model and Data Modeling sections. In addition to this document, you should also download and read our technology white paper. For the older TDengine version 1.6 documentation, please click here. + +## [TDengine Introduction](/evaluation) + +* [TDengine Introduction and Features](/evaluation#intro) +* [TDengine Use Scenes](/evaluation#scenes) +* [TDengine Performance Metrics and Verification]((/evaluation#)) + +## [Getting Started](/getting-started) + +* [Quickly Install](/getting-started#install): install via source code/package / Docker within seconds + +- [Easy to Launch](/getting-started#start): start / stop TDengine with systemctl +- [Command-line](/getting-started#console) : an easy way to access TDengine server +- [Experience Lightning Speed](/getting-started#demo): running a demo, inserting/querying data to experience faster speed +- [List of Supported Platforms](/getting-started#platforms): a list of platforms supported by TDengine server and client +- [Deploy to Kubernetes](https://taosdata.github.io/TDengine-Operator/en/index.html):a detailed guide for TDengine deployment in Kubernetes environment + +## [Overall Architecture](/architecture) + +- [Data Model](/architecture#model): relational database model, but one table for one device with static tags +- [Cluster and Primary Logical Unit](/architecture#cluster): Take advantage of NoSQL, support scale-out and high-reliability +- [Storage Model and Data Partitioning/Sharding](/architecture#sharding): tag data will be separated from time-series data, segmented by vnode and time +- [Data Writing and Replication Process](/architecture#replication): records received are written to WAL, cached, with acknowledgement is sent back to client, while supporting multi-replicas +- [Caching and Persistence](/architecture#persistence): latest records are cached in memory, but are written in columnar format with an ultra-high compression ratio +- [Data Query](/architecture#query): support various functions, time-axis aggregation, interpolation, and multi-table aggregation + +## [Data Modeling](/model) + +- [Create a Database](/model#create-db): create a database for all data collection points with similar features +- [Create a Super Table(STable)](/model#create-stable): create a STable for all data collection points with the same type +- [Create a Table](/model#create-table): use STable as the template, to create a table for each data collecting point + +## [TAOS SQL](/taos-sql) + +- [Data Types](/taos-sql#data-type): support timestamp, int, float, nchar, bool, and other types +- [Database Management](/taos-sql#management): add, drop, check databases +- [Table Management](/taos-sql#table): add, drop, check, alter tables +- [STable Management](/taos-sql#super-table): add, drop, check, alter STables +- [Tag Management](/taos-sql#tags): add, drop, alter tags +- [Inserting Records](/taos-sql#insert): support to write single/multiple items per table, multiple items across tables, and support to write historical data +- [Data Query](/taos-sql#select): support time segment, value filtering, sorting, manual paging of query results, etc +- [SQL Function](/taos-sql#functions): support various aggregation functions, selection functions, and calculation functions, such as avg, min, diff, etc +- [Time Dimensions Aggregation](/taos-sql#aggregation): aggregate and reduce the dimension after cutting table data by time segment +- [Boundary Restrictions](/taos-sql#limitation): restrictions for the library, table, SQL, and others +- [Error Code](/taos-sql/error-code): TDengine 2.0 error codes and corresponding decimal codes + +## [Efficient Data Ingestion](/insert) + +- [SQL Ingestion](/insert#sql): write one or multiple records into one or multiple tables via SQL insert command +- [Prometheus Ingestion](/insert#prometheus): Configure Prometheus to write data directly without any code +- [Telegraf Ingestion](/insert#telegraf): Configure Telegraf to write collected data directly without any code +- [EMQ X Broker](/insert#emq): Configure EMQ X to write MQTT data directly without any code +- [HiveMQ Broker](/insert#hivemq): Configure HiveMQ to write MQTT data directly without any code + +## [Efficient Data Querying](/queries) + +- [Main Query Features](/queries#queries): support various standard functions, setting filter conditions, and querying per time segment +- [Multi-table Aggregation Query](/queries#aggregation): use STable and set tag filter conditions to perform efficient aggregation queries +- [Downsampling to Query Value](/queries#sampling): aggregate data in successive time windows, support interpolation + +## [Advanced Features](/advanced-features) + +- [Continuous Query](/advanced-features#continuous-query): Based on sliding windows, the data stream is automatically queried and calculated at regular intervals +- [Data Publisher/Subscriber](/advanced-features#subscribe): subscribe to the newly arrived data like a typical messaging system +- [Cache](/advanced-features#cache): the newly arrived data of each device/table will always be cached +- [Alarm Monitoring](/advanced-features#alert): automatically monitor out-of-threshold data, and actively push it based-on configuration rules + +## [Connector](/connector) + +- [C/C++ Connector](/connector#c-cpp): primary method to connect to TDengine server through libtaos client library +- [Java Connector(JDBC)]: driver for connecting to the server from Java applications using the JDBC API +- [Python Connector](/connector#python): driver for connecting to TDengine server from Python applications +- [RESTful Connector](/connector#restful): a simple way to interact with TDengine via HTTP +- [Go Connector](/connector#go): driver for connecting to TDengine server from Go applications +- [Node.js Connector](/connector#nodejs): driver for connecting to TDengine server from Node.js applications +- [C# Connector](/connector#csharp): driver for connecting to TDengine server from C# applications +- [Windows Client](https://www.taosdata.com/blog/2019/07/26/514.html): compile your own Windows client, which is required by various connectors on the Windows environment + +## [Connections with Other Tools](/connections) + +- [Grafana](/connections#grafana): query the data saved in TDengine and provide visualization +- [MATLAB](/connections#matlab): access data stored in TDengine server via JDBC configured within MATLAB +- [R](/connections#r): access data stored in TDengine server via JDBC configured within R +- [IDEA Database](https://www.taosdata.com/blog/2020/08/27/1767.html): use TDengine visually through IDEA Database Management Tool + +## [Installation and Management of TDengine Cluster](/cluster) + +- [Preparation](/cluster#prepare): important considerations before deploying TDengine for production usage +- [Create Your First Node](/cluster#node-one): simple to follow the quick setup +- [Create Subsequent Nodes](/cluster#node-other): configure taos.cfg for new nodes to add more to the existing cluster +- [Node Management](/cluster#management): add, delete, and check nodes in the cluster +- [High-availability of Vnode](/cluster#high-availability): implement high-availability of Vnode through multi-replicas +- [Mnode Management](/cluster#mnode): automatic system creation without any manual intervention +- [Load Balancing](/cluster#load-balancing): automatically performed once the number of nodes or load changes +- [Offline Node Processing](/cluster#offline): any node that offline for more than a certain period will be removed from the cluster +- [Arbitrator](/cluster#arbitrator): used in the case of an even number of replicas to prevent split-brain + +## [TDengine Operation and Maintenance](/administrator) + +- [Capacity Planning](/administrator#planning): Estimating hardware resources based on scenarios +- [Fault Tolerance and Disaster Recovery](/administrator#tolerance): set the correct WAL and number of data replicas +- [System Configuration](/administrator#config): port, cache size, file block size, and other system configurations +- [User Management](/administrator#user): add/delete TDengine users, modify user password +- [Import Data](/administrator#import): import data into TDengine from either script or CSV file +- [Export Data](/administrator#export): export data either from TDengine shell or from the taosdump tool +- [System Monitor](/administrator#status): monitor the system connections, queries, streaming calculation, logs, and events +- [File Directory Structure](/administrator#directories): directories where TDengine data files and configuration files located +- [Parameter Restrictions and Reserved Keywords](/administrator#keywords): TDengine’s list of parameter restrictions and reserved keywords + +## TDengine Technical Design + +- [System Module]: taosd functions and modules partitioning +- [Data Replication]: support real-time synchronous/asynchronous replication, to ensure high-availability of the system +- [Technical Blog](https://www.taosdata.com/cn/blog/?categories=3): More technical analysis and architecture design articles + +## Common Tools + +- [TDengine sample import tools](https://www.taosdata.com/blog/2020/01/18/1166.html) +- [TDengine performance comparison test tools](https://www.taosdata.com/blog/2020/01/18/1166.html) +- [Use TDengine visually through IDEA Database Management Tool](https://www.taosdata.com/blog/2020/08/27/1767.html) + +## Performance: TDengine vs Others + +- [Performance: TDengine vs InfluxDB with InfluxDB’s open-source performance testing tool](https://www.taosdata.com/blog/2020/01/13/1105.html) +- [Performance: TDengine vs OpenTSDB](https://www.taosdata.com/blog/2019/08/21/621.html) +- [Performance: TDengine vs Cassandra](https://www.taosdata.com/blog/2019/08/14/573.html) +- [Performance: TDengine vs InfluxDB](https://www.taosdata.com/blog/2019/07/19/419.html) +- [Performance Test Reports of TDengine vs InfluxDB/OpenTSDB/Cassandra/MySQL/ClickHouse](https://www.taosdata.com/downloads/TDengine_Testing_Report_cn.pdf) + +## More on IoT Big Data + +- [Characteristics of IoT and Industry Internet Big Data](https://www.taosdata.com/blog/2019/07/09/characteristics-of-iot-big-data/) +- [Features and Functions of IoT Big Data platforms](https://www.taosdata.com/blog/2019/07/29/542.html) +- [Why don’t General Big Data Platforms Fit IoT Scenarios?](https://www.taosdata.com/blog/2019/07/09/why-does-the-general-big-data-platform-not-fit-iot-data-processing/) +- [Why TDengine is the best choice for IoT, Internet of Vehicles, and Industry Internet Big Data platforms?](https://www.taosdata.com/blog/2019/07/09/why-tdengine-is-the-best-choice-for-iot-big-data-processing/) + +## FAQ + +- [FAQ: Common questions and answers](/faq) diff --git a/documentation20/en/01.evaluation/docs.md b/documentation20/en/01.evaluation/docs.md new file mode 100644 index 0000000000000000000000000000000000000000..250f465d7b1280a78e18250f95aefaeca0c95415 --- /dev/null +++ b/documentation20/en/01.evaluation/docs.md @@ -0,0 +1,65 @@ +# TDengine Introduction + +## About TDengine + +TDengine is an innovative Big Data processing product launched by Taos Data in the face of the fast-growing Internet of Things (IoT) Big Data market and technical challenges. It does not rely on any third-party software, nor does it optimize or package any open-source database or stream computing product. Instead, it is a product independently developed after absorbing the advantages of many traditional relational databases, NoSQL databases, stream computing engines, message queues, and other software. TDengine has its own unique Big Data processing advantages in time-series space. + +One of the modules of TDengine is the time-series database. However, in addition to this, to reduce the complexity of research and development and the difficulty of system operation, TDengine also provides functions such as caching, message queuing, subscription, stream computing, etc. TDengine provides a full-stack technical solution for the processing of IoT and Industrial Internet BigData. It is an efficient and easy-to-use IoT Big Data platform. Compared with typical Big Data platforms such as Hadoop, TDengine has the following distinct characteristics: + +- **Performance improvement over 10 times**: An innovative data storage structure is defined, with each single core can process at least 20,000 requests per second, insert millions of data points, and read more than 10 million data points, which is more than 10 times faster than other existing general database. +- **Reduce the cost of hardware or cloud services to 1/5**: Due to its ultra-performance, TDengine’s computing resources consumption is less than 1/5 of other common Big Data solutions; through columnar storage and advanced compression algorithms, the storage consumption is less than 1/10 of other general databases. +- **Full-stack time-series data processing engine**: Integrate database, message queue, cache, stream computing, and other functions, and the applications do not need to integrate with software such as Kafka/Redis/HBase/Spark/HDFS, thus greatly reducing the complexity cost of application development and maintenance. +- **Powerful analysis functions**: Data from ten years ago or one second ago, can all be queried based on a specified time range. Data can be aggregated on a timeline or multiple devices. Ad-hoc queries can be made at any time through Shell, Python, R, and MATLAB. +- **Seamless connection with third-party tools**: Integration with Telegraf, Grafana, EMQ, HiveMQ, Prometheus, MATLAB, R, etc. without even one single line of code. OPC, Hadoop, Spark, etc. will be supported in the future, and more BI tools will be seamlessly connected to. +- **Zero operation cost & zero learning cost**: Installing clusters is simple and quick, with real-time backup built-in, and no need to split libraries or tables. Similar to standard SQL, TDengine can support RESTful, Python/Java/C/C++/C#/Go/Node.js, and similar to MySQL with zero learning cost. + +With TDengine, the total cost of ownership of typical IoT, Internet of Vehicles, and Industrial Internet Big Data platforms can be greatly reduced. However, it should be pointed out that due to making full use of the characteristics of IoT time-series data, TDengine cannot be used to process general data from web crawlers, microblogs, WeChat, e-commerce, ERP, CRM, and other sources. + +![TDengine Technology Ecosystem](page://images/eco_system.png) + +
Figure 1. TDengine Technology Ecosystem
+ +## Overall Scenarios of TDengine + +As an IoT Big Data platform, the typical application scenarios of TDengine are mainly presented in the IoT category, with users having a certain amount of data. The following sections of this document are mainly aimed at IoT-relevant systems. Other systems, such as CRM, ERP, etc., are beyond the scope of this article. + +### Characteristics and Requirements of Data Sources + +From the perspective of data sources, designers can analyze the applicability of TDengine in target application systems as following. + +| **Data Source Characteristics and Requirements** | **Not Applicable** | **Might Be Applicable** | **Very Applicable** | **Description** | +| -------------------------------------------------------- | ------------------ | ----------------------- | ------------------- | :----------------------------------------------------------- | +| A huge amount of total data | | | √ | TDengine provides excellent scale-out functions in terms of capacity, and has a storage structure matching high compression ratio to achieve the best storage efficiency in the industry. | +| Data input velocity is occasionally or continuously huge | | | √ | TDengine's performance is much higher than other similar products. It can continuously process a large amount of input data in the same hardware environment, and provide a performance evaluation tool that can easily run in the user environment. | +| A huge amount of data sources | | | √ | TDengine is designed to include optimizations specifically for a huge amount of data sources, such as data writing and querying, which is especially suitable for efficiently processing massive (tens of millions or more) data sources. | + +### System Architecture Requirements + +| **System Architecture Requirements** | **Not Applicable** | **Might Be Applicable** | **Very Applicable** | **Description** | +| ------------------------------------------------- | ------------------ | ----------------------- | ------------------- | ------------------------------------------------------------ | +| Require a simple and reliable system architecture | | | √ | TDengine's system architecture is very simple and reliable, with its own message queue, cache, stream computing, monitoring and other functions, and no need to integrate any additional third-party products. | +| Require fault-tolerance and high-reliability | | | √ | TDengine has cluster functions to automatically provide high-reliability functions such as fault tolerance and disaster recovery. | +| Standardization specifications | | | √ | TDengine uses standard SQL language to provide main functions and follow standardization specifications. | + +### System Function Requirements + +| **System Architecture Requirements** | **Not Applicable** | **Might Be Applicable** | **Very Applicable** | **Description** | +| ------------------------------------------------- | ------------------ | ----------------------- | ------------------- | ------------------------------------------------------------ | +| Require completed data processing algorithms built-in | | √ | | TDengine implements various general data processing algorithms, but has not properly handled all requirements of different industries, so special types of processing shall be processed at the application level. | +| Require a huge amount of crosstab queries | | √ | | This type of processing should be handled more by relational database systems, or TDengine and relational database systems should fit together to implement system functions. | + +### System Performance Requirements + +| **System Architecture Requirements** | **Not Applicable** | **Might Be Applicable** | **Very Applicable** | **Description** | +| ------------------------------------------------- | ------------------ | ----------------------- | ------------------- | ------------------------------------------------------------ | +| Require larger total processing capacity | | | √ | TDengine’s cluster functions can easily improve processing capacity via multi-server-cooperating. | +| Require high-speed data processing | | | √ | TDengine’s storage and data processing are designed to be optimized for IoT, can generally improve the processing speed by multiple times than other similar products. | +| Require fast processing of fine-grained data | | | √ | TDengine has achieved the same level of performance with relational and NoSQL data processing systems. | + +### System Maintenance Requirements + +| **System Architecture Requirements** | **Not Applicable** | **Might Be Applicable** | **Very Applicable** | **Description** | +| ------------------------------------------------- | ------------------ | ----------------------- | ------------------- | ------------------------------------------------------------ | +| Require system with high-reliability | | | √ | TDengine has a very robust and reliable system architecture to implement simple and convenient daily operation with streamlined experiences for operators, thus human errors and accidents are eliminated to the greatest extent. | +| Require controllable operation learning cost | | | √ | As above. | +| Require abundant talent supply | √ | | | As a new-generation product, it’s still difficult to find talents with TDengine experiences from market. However, the learning cost is low. As the vendor, we also provide extensive operation training and counselling services. | diff --git a/documentation20/en/02.getting-started/docs.md b/documentation20/en/02.getting-started/docs.md new file mode 100644 index 0000000000000000000000000000000000000000..3c9d9ac6af54cfd49a4b2700c8c79773f08a2120 --- /dev/null +++ b/documentation20/en/02.getting-started/docs.md @@ -0,0 +1,221 @@ +# Quick Start + +## Quick Install + +TDengine software consists of 3 parts: server, client, and alarm module. At the moment, TDengine server only runs on Linux (Windows, mac OS and more OS supports will come soon), but client can run on either Windows or Linux. TDengine client can be installed and run on Windows or Linux. Applications based-on any OSes can all connect to server taosd via a RESTful interface. About CPU, TDengine supports X64/ARM64/MIPS64/Alpha64, and ARM32、RISC-V, other more CPU architectures will be supported soon. You can set up and install TDengine server either from the [source code](https://www.taosdata.com/en/getting-started/#Install-from-Source) or the [packages](https://www.taosdata.com/en/getting-started/#Install-from-Package). + +### Install from Source + +Please visit our [TDengine github page](https://github.com/taosdata/TDengine) for instructions on installation from the source code. + +### Install from Docker Container + +Please visit our [TDengine Official Docker Image: Distribution, Downloading, and Usage](https://www.taosdata.com/blog/2020/05/13/1509.html). + +### Install from Package + +It’s extremely easy to install for TDengine, which takes only a few seconds from downloaded to successful installed. The server installation package includes clients and connectors. We provide 3 installation packages, which you can choose according to actual needs: + +Click [here](https://www.taosdata.com/cn/getting-started/#%E9%80%9A%E8%BF%87%E5%AE%89%E8%A3%85%E5%8C%85%E5%AE%89%E8%A3%85) to download the install package. + +For more about installation process, please refer [TDengine Installation Packages: Install and Uninstall](https://www.taosdata.com/blog/2019/08/09/566.html), and [Video Tutorials](https://www.taosdata.com/blog/2020/11/11/1941.html). + +## Quick Launch + +After installation, you can start the TDengine service by the `systemctl` command. + +```bash +$ systemctl start taosd +``` + +Then check if the service is working now. + +```bash +$ systemctl status taosd +``` + +If the service is running successfully, you can play around through TDengine shell `taos`. + +**Note:** + +- The `systemctl` command needs the **root** privilege. Use **sudo** if you are not the **root** user. +- To get better product feedback and improve our solution, TDengine will collect basic usage information, but you can modify the configuration parameter **telemetryReporting** in the system configuration file taos.cfg, and set it to 0 to turn it off. +- TDengine uses FQDN (usually hostname) as the node ID. In order to ensure normal operation, you need to set hostname for the server running taosd, and configure DNS service or hosts file for the machine running client application, to ensure the FQDN can be resolved. +- TDengine supports installation on Linux systems with[ systemd ](https://en.wikipedia.org/wiki/Systemd)as the process service management, and uses `which systemctl` command to detect whether `systemd` packages exist in the system: + + ```bash + $ which systemctl + ``` + +If `systemd` is not supported in the system, TDengine service can also be launched via `/usr/local/taos/bin/taosd` manually. + +## TDengine Shell Command Line + +To launch TDengine shell, the command line interface, in a Linux terminal, type: + +```bash +$ taos +``` + +The welcome message is printed if the shell connects to TDengine server successfully, otherwise, an error message will be printed (refer to our [FAQ](https://www.taosdata.com/en/faq) page for troubleshooting the connection error). The TDengine shell prompt is: + +```cmd +taos> +``` + +In the TDengine shell, you can create databases, create tables and insert/query data with SQL. Each query command ends with a semicolon. It works like MySQL, for example: + +```mysql +create database demo; + +use demo; + +create table t (ts timestamp, speed int); + +insert into t values ('2019-07-15 00:00:00', 10); + +insert into t values ('2019-07-15 01:00:00', 20); + +select * from t; + +ts | speed | + +=================================== + +19-07-15 00:00:00.000| 10| + +19-07-15 01:00:00.000| 20| + +Query OK, 2 row(s) in set (0.001700s) +``` + +Besides the SQL commands, the system administrator can check system status, add or delete accounts, and manage the servers. + +### Shell Command Line Parameters + +You can configure command parameters to change how TDengine shell executes. Some frequently used options are listed below: + +- -c, --config-dir: set the configuration directory. It is */etc/taos* by default. +- -h, --host: set the IP address of the server it will connect to. Default is localhost. +- -s, --commands: set the command to run without entering the shell. +- -u, -- user: user name to connect to server. Default is root. +- -p, --password: password. Default is 'taosdata'. +- -?, --help: get a full list of supported options. + +Examples: + +```bash +$ taos -h 192.168.0.1 -s "use db; show tables;" +``` + +### Run SQL Command Scripts + +Inside TDengine shell, you can run SQL scripts in a file with source command. + +```mysql +taos> source ; +``` + +### Shell Tips + +- Use up/down arrow key to check the command history +- To change the default password, use "alter user" command +- Use ctrl+c to interrupt any queries +- To clean the schema of local cached tables, execute command `RESET QUERY CACHE` + +## Experience TDengine’s Lightning Speed + +After starting the TDengine server, you can execute the command `taosdemo` in the Linux terminal. + +```bash +$ taosdemo +``` + +Using this command, a STable named `meters` will be created in the database `test` There are 10k tables under this stable, named from `t0` to `t9999`. In each table there are 100k rows of records, each row with columns (`f1`, `f2` and `f3`. The timestamp is from "2017-07-14 10:40:00 000" to "2017-07-14 10:41:39 999". Each table also has tags `areaid` and `loc`: `areaid` is set from 1 to 10, `loc` is set to "beijing" or "shanghai". + +It takes about 10 minutes to execute this command. Once finished, 1 billion rows of records will be inserted. + +In the TDengine client, enter sql query commands and then experience our lightning query speed. + +- query total rows of records: + +```mysql +taos> select count(*) from test.meters; +``` + +- query average, max and min of the total 1 billion records: + +```mysql +taos> select avg(f1), max(f2), min(f3) from test.meters; +``` + +- query the number of records where loc="beijing": + +```mysql +taos> select count(*) from test.meters where loc="beijing"; +``` + +- query the average, max and min of total records where areaid=10: + +```mysql +taos> select avg(f1), max(f2), min(f3) from test.meters where areaid=10; +``` + +- query the average, max, min from table t10 when aggregating over every 10s: + +```mysql +taos> select avg(f1), max(f2), min(f3) from test.t10 interval(10s); +``` + +**Note**: you can run command `taosdemo` with many options, like number of tables, rows of records and so on. To know more about these options, you can execute `taosdemo --help` and then take a try using different options. + +## Client and Alarm Module + +If your client and server running on different machines, please install the client separately. Linux and Windows packages are provided: + +- TDengine-client-2.0.10.0-Linux-x64.tar.gz(3.0M) +- TDengine-client-2.0.10.0-Windows-x64.exe(2.8M) +- TDengine-client-2.0.10.0-Windows-x86.exe(2.8M) + +Linux package of Alarm Module is as following (please refer [How to Use Alarm Module](https://github.com/taosdata/TDengine/blob/master/alert/README_cn.md)): + +- TDengine-alert-2.0.10.0-Linux-x64.tar.gz (8.1M) + +## List of Supported Platforms + +List of platforms supported by TDengine server + +| | **CentOS 6/7/8** | **Ubuntu 16/18/20** | **Other Linux** | UnionTech UOS | NeoKylin | LINX V60/V80 | +| ------------------ | ---------------- | ------------------- | --------------- | ------------- | -------- | ------------ | +| X64 | ● | ● | | ○ | ● | ● | +| Loongson MIPS64 | | | ● | | | | +| Kunpeng ARM64 | | ○ | ○ | | ● | | +| SWCPU Alpha64 | | | ○ | ● | | | +| FT ARM64 | | ○Ubuntu Kylin | | | | | +| Hygon X64 | ● | ● | ● | ○ | ● | ● | +| Rockchip ARM64 | | | ○ | | | | +| Allwinner ARM64 | | | ○ | | | | +| Actions ARM64 | | | ○ | | | | + +Note: ● has been verified by official tests; ○ has been verified by unofficial tests. + +List of platforms supported by TDengine client and connectors + +At the moment, TDengine connectors can support a wide range of platforms, including hardware platforms such as X64/X86/ARM64/ARM32/MIPS/Alpha, and development environments such as Linux/Win64/Win32. + +Comparison matrix as following: + +| **CPU** | **X64 64bit** | | | **X86 32bit** | **ARM64** | **ARM32** | **MIPS Godson** | **Alpha Shenwei** | **X64 TimecomTech** | +| ----------- | ------------- | --------- | --------- | ------------- | --------- | --------- | --------------- | ----------------- | ------------------- | +| **OS** | **Linux** | **Win64** | **Win32** | **Win32** | **Linux** | **Linux** | **Linux** | **Linux** | **Linux** | +| **C/C++** | ● | ● | ● | ○ | ● | ● | ● | ● | ● | +| **JDBC** | ● | ● | ● | ○ | ● | ● | ● | ● | ● | +| **Python** | ● | ● | ● | ○ | ● | ● | ● | -- | ● | +| **Go** | ● | ● | ● | ○ | ● | ● | ○ | -- | -- | +| **NodeJs** | ● | ● | ○ | ○ | ● | ● | ○ | -- | -- | +| **C#** | ○ | ● | ● | ○ | ○ | ○ | ○ | -- | -- | +| **RESTful** | ● | ● | ● | ● | ● | ● | ● | ● | ● | + +Note: ● has been verified by official tests; ○ has been verified by unofficial tests. + +Please visit [Connectors](https://www.taosdata.com/en/documentation/connector) section for more detailed information. diff --git a/documentation20/en/03.architecture/docs.md b/documentation20/en/03.architecture/docs.md new file mode 100644 index 0000000000000000000000000000000000000000..ce8dd6c8be75ae87afcd51fbbecbaf97a274ba3e --- /dev/null +++ b/documentation20/en/03.architecture/docs.md @@ -0,0 +1,434 @@ +# Data Model and Architecture + +## Data Model + +### A Typical IoT Scenario + +In typical IoT, Internet of Vehicles and Operation Monitoring scenarios, there are often many different types of data collecting devices that collect one or more different physical metrics. However, for the collection devices of the same type, there are often many specific collection devices distributed in places. BigData processing system aims to collect all kinds of data, and then calculate and analyze them. For the same kind of devices, the data collected are very regular. Taking smart meters as an example, assuming that each smart meter collects three metrics of current, voltage and phase, the collected data are similar to the following table: + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Device IDTime StampCollected MetricsTags
Device IDTime StampcurrentvoltagephaselocationgroupId
d1001153854868500010.32190.31Beijing.Chaoyang2
d1002153854868400010.22200.23Beijing.Chaoyang3
d1003153854868650011.52210.35Beijing.Haidian3
d1004153854868550013.42230.29Beijing.Haidian2
d1001153854869500012.62180.33Beijing.Chaoyang2
d1004153854869660011.82210.28Beijing.Haidian2
d1002153854869665010.32180.25Beijing.Chaoyang3
d1001153854869680012.32210.31Beijing.Chaoyang2
+ +
Table 1: Smart meter example data
+ +Each data record contains the device ID, timestamp, collected metrics (current, voltage, phase as above), and static tags (Location and groupId in Table 1) associated with the devices. Each device generates a data record in a pre-defined timer or triggered by an external event. It is a sequence of data points like a stream. + +### Data Characteristics + +As the data points are a series of data points over time, the data points generated by IoT, Internet of Vehicles, and Operation Monitoring have some strong common characteristics: + +1. Metrics are always structured data; +2. There are rarely delete/update operations on collected data; +3. No need for transactions of traditional databases +4. The ratio of reading is lower but write is higher than typical Internet applications; +5. data flow is uniform and can be predicted according to the number of devices and collection frequency; +6. the user pays attention to the trend of data, not a specific value at a specific time; +7. there is always a data retention policy; +8. the data query is always executed in a given time range and a subset of space; +9. in addition to storage and query operations, various statistical and real-time calculation operations are also required; +10. data volume is huge, a system may generate over 10 billion data points in a day. + +By utilizing the above characteristics, TDengine designs the storage and computing engine in a special and optimized way for time-series data, resulting in massive improvements in system efficiency. + +### Relational Database Model + +Since time-series data is most likely to be structured data, TDengine adopts the traditional relational database model to process them with a shallow learning curve. You need to create a database, create tables with schema definitions, then insert data points and execute queries to explore the data. Standard SQL is used, instead of NoSQL’s key-value storage. + +### One Table for One Collection Point + +To utilize this time-series and other data features, TDengine requires the user to create a table for each collection point to store collected time-series data. For example, if there are over 10 millions smart meters, means 10 millions tables shall be created. For the table above, 4 tables shall be created for devices D1001, D1002, D1003, and D1004 to store the data collected. This design has several advantages: + +1. Guarantee that all data from a collection point can be saved in a continuous memory/hard disk space block by block. If queries are applied only on one point in a time range, this design will reduce the random read latency significantly, thus increase read and query speed by orders of magnitude. +2. Since the data generation process of each collection device is completely independent, means each device has its unique data source, thus writes can be carried out in a lock-free manner to greatly improve the speed. +3. Write latency can be significantly reduced too as the data points generated by the same device will arrive in time order, the new data point will be simply appended to a block. + +If the data of multiple devices are written into a table in the traditional way, due to the uncontrollable network delay, the timing of the data from different devices arriving at the server cannot be guaranteed, the writing operation must be protected by locks, and the data of one device cannot be guaranteed to continuously stored together. **The method of one table for each data collection point can ensure the optimal performance of insertion and query of a single data collection point to the greatest extent.** + +TDengine suggests using collection point ID as the table name (like D1001 in the above table). Each point may collect one or more metrics (like the current, voltage, phase as above). Each metric has a column in the table. The data type for a column can be int, float, string and others. In addition, the first column in the table must be a timestamp. TDengine uses the time stamp as the index, and won’t build the index on any metrics stored. All data will be stored in columns. + +### STable: A Collection of Data Points in the Same Type + +The method of one table for each point will bring a greatly increasing number of tables, which is difficult to manage. Moreover, applications often need to take aggregation operations between collection points, thus aggregation operations will become complicated. To support aggregation over multiple tables efficiently, the [STable(Super Table)](https://www.taosdata.com/en/documentation/super-table) concept is introduced by TDengine. + +STable is an abstract collection for a type of data point. A STable contains a set of points (tables) that have the same schema or data structure, but with different static attributes (tags). To describe a STable (a combination of data collection points of a specific type), in addition to defining the table structure of the collected metrics, it is also necessary to define the schema of its tag. The data type of tags can be int, float, string, and there can be multiple tags, which can be added, deleted, or modified afterward. If the whole system has N different types of data collection points, N STables need to be established. + +In the design of TDengine, **a table is used to represent a specific data collection point, and STable is used to represent a set of data collection points of the same type**. When creating a table for a specific data collection point, the user uses the definition of STable as a template and specifies the tag value of the specific collection point (table). Compared with the traditional relational database, the table (a data collection point) has static tags, and these tags can be added, deleted, and modified afterward. **A STable contains multiple tables with the same time-series data schema but different tag values.** + +When aggregating multiple data collection points with the same data type, TDEngine will first find out the tables that meet the tag filters from the STables, and then scan the time-series data of these tables to perform aggregation operation, which can greatly reduce the data sets to be scanned, thus greatly improving the performance of aggregation calculation. + +## Cluster and Primary Logic Unit + +The design of TDengine is based on the assumption that one single hardware or software system is unreliable and that no single computer can provide sufficient computing and storage resources to process massive data. Therefore, TDengine has been designed according to a distributed and high-reliability architecture since Day One of R&D, which supports scale-out, so that hardware failure or software failure of any single or multiple servers will not affect the availability and reliability of the system. At the same time, through node virtualization and automatic load-balancing technology, TDengine can make the most efficient use of computing and storage resources in heterogeneous clusters to reduce hardware investment. + +### Primary Logic Unit + +Logical structure diagram of TDengine distributed architecture as following: + +![TDengine architecture diagram](page://images/architecture/structure.png) +
Picture 1: TDengine architecture diagram
+ + + +A complete TDengine system runs on one or more physical nodes. Logically, it includes data node (dnode), TDEngine application driver (taosc) and application (app). There are one or more data nodes in the system, which form a cluster. The application interacts with the TDengine cluster through taosc's API. The following is a brief introduction to each logical unit. + +**Physical node (pnode)**: A pnode is a computer that runs independently and has its own computing, storage and network capabilities. It can be a physical machine, virtual machine or Docker container installed with OS. The physical node is identified by its configured FQDN (Fully Qualified Domain Name). TDengine relies entirely on FQDN for network communication. If you don't know about FQDN, please read the blog post "[All about FQDN of TDengine](https://www.taosdata.com/blog/2020/09/11/1824.html)". + +**Data node (dnode):** A dnode is a running instance of the TDengine server-side execution code taosd on a physical node. A working system must have at least one data node. A dnode contains zero to multiple logical virtual nodes (VNODE), zero or at most one logical management node (mnode). The unique identification of a dnode in the system is determined by the instance's End Point (EP). EP is a combination of FQDN (Fully Qualified Domain Name) of the physical node where the dnode is located and the network port number (Port) configured by the system. By configuring different ports, a physical node (a physical machine, virtual machine or container) can run multiple instances or have multiple data nodes. + +**Virtual node (vnode)**: In order to better support data sharding, load balancing and prevent data from overheating or skewing, data nodes are virtualized into multiple virtual nodes (vnode, V2, V3, V4, etc. in the figure). Each vnode is a relatively independent work unit, which is the basic unit of time-series data storage, and has independent running threads, memory space and persistent storage path. A vnode contains a certain number of tables (data collection points). When a new table is created, the system checks whether a new vnode needs to be created. The number of vnodes that can be created on a data node depends on the hardware capacities of the physical node where the data node is located. A vnode belongs to only one DB, but a DB can have multiple vnodes. In addition to the stored time-series data, a vnode also stores the schema and tag values of the included tables. A virtual node is uniquely identified in the system by the EP of the data node and the VGroup ID to which it belongs, and is created and managed by the management node. + +**Management node (mnode)**: A virtual logical unit responsible for monitoring and maintaining the running status of all data nodes and load balancing among nodes (M in figure). At the same time, the management node is also responsible for the storage and management of metadata (including users, databases, tables, static tags, etc.), so it is also called Meta Node. Multiple (up to 5) mnodes can be configured in a TDengine cluster, and they are automatically constructed into a virtual management node group (M0, M1, M2 in the figure). The master/slave mechanism is used to manage between mnodes, and the data synchronization is carried out in a strong consistent way. Any data update operation can only be done on the master. The creation of mnode cluster is completed automatically by the system without manual intervention. There is at most one mnode on each dnode, which is uniquely identified by the EP of the data node to which it belongs. Each dnode automatically obtains the EP of the dnode where all mnodes in the whole cluster are located through internal messaging interaction. + +**Virtual node group (VGroup)**: Vnodes on different data nodes can form a virtual node group to ensure the high reliability of the system. The virtual node group is managed in a master/slave structure. Write operations can only be performed on the master vnode, and the system synchronizes data to the slave vnode via replication, thus ensuring that one single replica of data is copied on multiple physical nodes. The number of virtual nodes in a vgroup equals the number of data replicas. If the number of replicas of a DB is N, the system must have at least N data nodes. The number of replicas can be specified by the parameter replica when creating DB, and the default is 1. Using the multi-replica feature of TDengine, the same high data reliability can be done without the need for expensive storage devices such as disk arrays. Virtual node group is created and managed by management node, and the management node assigns a system unique ID, aka VGroup ID. If two virtual nodes has the same vnode group ID, means that they belong to the same group and the data is backed up to each other. The number of virtual nodes in a virtual node group can be dynamically changed, allowing only one, that is, no data replication. VGroup ID is never changed. Even if a virtual node group is deleted, its ID will not be reused. + +**TAOSC**: TAOSC is the driver provided by TDengine to applications, which is responsible for dealing with the interface interaction between application and cluster, and provides the native interface of C/C++ language, which is embedded in JDBC, C #, Python, Go, Node.js language connection libraries. Applications interact with the whole cluster through taosc instead of directly connecting to data nodes in the cluster. This module is responsible for obtaining and caching metadata; forwarding requests for insertion, query, etc. to the correct data node; when returning the results to the application, taosc also need to be responsible for the final level of aggregation, sorting, filtering and other operations. For JDBC, C/C++/C #/Python/Go/Node.js interfaces, this module runs on the physical node where the application is located. At the same time, in order to support the fully distributed RESTful interface, taosc has a running instance on each dnode of TDengine cluster. + +### Node Communication + +**Communication mode**: The communication among each data node of TDengine system, and among application driver and each data node is carried out through TCP/UDP. Considering an IoT scenario, the data writing packets are generally not large, so TDengine uses UDP in addition to TCP for transmission, because UDP is more efficient and is not limited by the number of connections. TDengine implements its own timeout, retransmission, confirmation and other mechanisms to ensure reliable transmission of UDP. For packets with a data volume of less than 15K, UDP is adopted for transmission, and TCP is automatically adopted for transmission of packets with a data volume of more than 15K or query operations. At the same time, TDengine will automatically compress/decompress the data, digital sign/authenticate the data according to the configuration and data packet. For data replication among data nodes, only TCP is used for data transmission. + +**FQDN configuration:** A data node has one or more FQDNs, which can be specified in the system configuration file taos.cfg with the parameter "fqdn". If it is not specified, the system will automatically use the hostname of the computer as its FQDN. If the node is not configured with FQDN, you can directly set the configuration parameter fqdn of the node to its IP address. However, IP is not recommended because IP address is variable, and once it changes, the cluster will not work properly. The EP (End Point) of a data node consists of FQDN + Port. With FQDN, it is necessary to ensure the normal operation of DNS service, or configure hosts files on nodes and the nodes where applications are located. + +**Port configuration**: The external port of a data node is determined by the system configuration parameter serverPort in TDengine, and the port for internal communication of cluster is serverPort+5. The data replication operation among data nodes in the cluster also occupies a TCP port, which is serverPort+10. In order to support multithreading and efficient processing of UDP data, each internal and external UDP connection needs to occupy 5 consecutive ports. Therefore, the total port range of a data node will be serverPort to serverPort + 10, for a total of 11 TCP/UDP ports. When using, make sure that the firewall keeps these ports open. Each data node can be configured with a different serverPort. + +**Cluster external connection**: TDengine cluster can accommodate one single, multiple or even thousands of data nodes. The application only needs to initiate a connection to any data node in the cluster. The network parameter required for connection is the End Point (FQDN plus configured port number) of a data node. When starting the application taos through CLI, the FQDN of the data node can be specified through the option-h, and the configured port number can be specified through -p. If the port is not configured, the system configuration parameter serverPort of TDengine will be adopted. + +**Inter-cluster communication**: Data nodes connect with each other through TCP/UDP. When a data node starts, it will obtain the EP information of the dnode where the mnode is located, and then establish a connection with the mnode in the system to exchange information. There are three steps to obtain EP information of the mnode: 1. Check whether the mnodeEpList file exists, if it does not exist or cannot be opened normally to obtain EP information of the mnode, skip to the second step; 2: Check the system configuration file taos.cfg to obtain node configuration parameters firstEp and secondEp (the node specified by these two parameters can be a normal node without mnode, in this case, the node will try to redirect to the mnode node when connected). If these two configuration parameters do not exist or do not exist in taos.cfg, or are invalid, skip to the third step; 3: Set your own EP as a mnode EP and run it independently. After obtaining the mnode EP list, the data node initiates the connection. It will successfully join the working cluster after connected. If not successful, it will try the next item in the mnode EP list. If all attempts are made, but the connection still fails, sleep for a few seconds before trying again. + +**The choice of MNODE**: TDengine logically has a management node, but there is no separated execution code. The server side only has a set of execution code taosd. So which data node will be the management node? This is determined automatically by the system without any manual intervention. The principle is as follows: when a data node starts, it will check its End Point and compare it with the obtained mnode EP List. If its EP exists in it, the data node shall start the mnode module and become a mnode. If your own EP is not in the mnode EP List, the mnode module will not start. During the system operation, due to load balancing, downtime and other reasons, mnode may migrate to the new dnode, while totally transparent without manual intervention. The modification of configuration parameters is the decision made by mnode itself according to resources usage. + +**Add new data nodes:** After the system has a data node, it has become a working system. There are two steps to add a new node into the cluster. Step1: Connect to the existing working data node using TDengine CLI, and then add the End Point of the new data node with the command "create dnode"; Step 2: In the system configuration parameter file taos.cfg of the new data node, set the firstEp and secondEp parameters to the EP of any two data nodes in the existing cluster. Please refer to the detailed user tutorial for detailed steps. In this way, the cluster will be established step by step. + +**Redirection**: No matter about dnode or taosc, the connection to the mnode shall be initiated first, but the mnode is automatically created and maintained by the system, so user does not know which dnode is running the mnode. TDengine only requires a connection to any working dnode in the system. Because any running dnode maintains the currently running mnode EP List, when receiving a connecting request from the newly started dnode or taosc, if it’s not an mnode by self, it will reply the mnode EP List back. After receiving this list, taosc or the newly started dnode will try to establish the connection again. When the mnode EP List changes, each data node quickly obtains the latest list and notifies taosc through messaging interaction among nodes. + +### A Typical Messaging Process + +To explain the relationship between vnode, mnode, taosc and application and their respective roles, the following is an analysis of a typical data writing process. + +![ typical process of TDengine](page://images/architecture/message.png) +
Picture 2 typical process of TDengine
+ +1. Application initiates a request to insert data through JDBC, ODBC, or other APIs. +2. Cache be checked by taosc that if meta data existing for the table. If so, go straight to Step 4. If not, taosc sends a get meta-data request to mnode. +3. Mnode returns the meta-data of the table to taosc. Meta-data contains the schema of the table, and also the vgroup information to which the table belongs (the vnode ID and the End Point of the dnode where the table belongs. If the number of replicas is N, there will be N groups of End Points). If taosc does not receive a response from the mnode for a long time, and there are multiple mnodes, taosc will send a request to the next mnode. +4. Taosc initiates an insert request to master vnode. +5. After vnode inserts the data, it gives a reply to taosc, indicating that the insertion is successful. If taosc doesn't get a response from vnode for a long time, taosc will judge the node as offline. In this case, if there are multiple replicas of the inserted database, taosc will issue an insert request to the next vnode in vgroup. +6. Taosc notifies APP that writing is successful. + +For Step 2 and 3, when taosc starts, it does not know the End Point of mnode, so it will directly initiate a request to the externally serving End Point of the configured cluster. If the dnode that received the request does not have an mnode configured, it will inform the mnode EP list in a reply message, so that taosc will re-issue a request to obtain meta-data to the EP of another new mnode. + +For Step 4 and 5, without caching, taosc can't recognize the master in the virtual node group, so assumes that the first vnodeID is the master and send a request to it. If the requested vnode is not the master, it will reply the actual master as a new target taosc makes a request to. Once the reply of successful insertion is obtained, taosc will cache the information of master node. + +The above is the process of inserting data, and the processes of querying and calculating are completely consistent. Taosc encapsulates and shields all these complicated processes, and has no perception and no special treatment for applications. + +Through taosc caching mechanism, mnode needs to be accessed only when a table is operated for the first time, so mnode will not become a system bottleneck. However, because schema and vgroup may change (such as load balancing), taosc will interact with mnode regularly to automatically update the cache. + +## Storage Model and Data Partitioning/Sharding + +### Storage Model + +The data stored by TDengine include collected time-series data, metadata related to libraries and tables, tag data, etc. These data are specifically divided into three parts: + +- Time-series data: stored in vnode and composed of data, head and last files. The amount of data is large and query amount depends on the application scenario. Out-of-order writing is allowed, but delete operation is not supported for the time being, and update operation is only allowed when update parameter is set to 1. By adopting the model with one table for each collection point, the data of a given time period is continuously stored, and the writing against one single table is a simple add operation. Multiple records can be read at one time, thus ensuring the insert and query operation of a single collection point with best performance. +- Tag data: meta files stored in vnode support four standard operations of add, delete, modify and check. The amount of data is not large. If there are N tables, there are N records, so all can be stored in memory. If there are many tag filtering operations, queries will be very frequent and TDengine supports multi-core and multi-threaded concurrent queries. As long as the computing resources are sufficient, even in face of millions of tables, the filtering results will return in milliseconds. +- Metadata: stored in mnode, including system node, user, DB, Table Schema and other information. Four standard operations of add, delete, modify and query are supported. The amount of these data are not large and can be stored in memory, moreover the query amount is not large because of the client cache. Therefore, TDengine uses centralized storage management, however, there will be no performance bottleneck. + +Compared with the typical NoSQL storage model, TDengine stores tag data and time-series data completely separately, which has two major advantages: + +- Greatly reduce the redundancy of tag data storage: general NoSQL database or time-series database adopts K-V storage, in which Key includes timestamp, device ID and various tags. Each record carries these duplicates, so wasting storage space. Moreover, if the application needs to add, modify or delete tags on historical data, it has to traverse the data and rewrite again, which is extremely expensive to operate. +- Realize extremely efficient aggregation query between multiple tables: when doing aggregation query between multiple tables, it firstly finds out the tag filtered tables, and then find out the corresponding data blocks of these tables to greatly reduce the data sets to be scanned, thus greatly improving the query efficiency. Moreover, tag data is managed and maintained in a full-memory structure, and tag data queries in tens of millions can return in milliseconds. + +### Data Sharding + +For large-scale data management, to achieve scale-out, it is generally necessary to adopt the a Partitioning strategy as Sharding. TDengine implements data sharding via vnode, and time-series data partitioning via one data file for each time range. + +VNode (Virtual Data Node) is responsible for providing writing, query and calculation functions for collected time-series data. To facilitate load balancing, data recovery and support heterogeneous environments, TDengine splits a data node into multiple vnodes according to its computing and storage resources. The management of these vnodes is done automatically by TDengine and completely transparent to the application. + +For a single data collection point, regardless of the amount of data, a vnode (or vnode group, if the number of replicas is greater than 1) has enough computing resource and storage resource to process (if a 16-byte record is generated per second, the original data generated in one year will be less than 0.5 G), so TDengine stores all the data of a table (a data collection point) in one vnode instead of distributing the data to two or more dnodes. Moreover, a vnode can store data from multiple data collection points (tables), and the upper limit of the tables’ quantity for a vnode is one million. By design, all tables in a vnode belong to the same DB. On a data node, unless specially configured, the number of vnodes owned by a DB will not exceed the number of system cores. + +When creating a DB, the system does not allocate resources immediately. However, when creating a table, the system will check if there is an allocated vnode with free tablespace. If so, the table will be created in the vacant vnode immediately. If not, the system will create a new vnode on a dnode from the cluster according to the current workload, and then a table. If there are multiple replicas of a DB, the system does not create only one vnode, but a vgroup (virtual data node group). The system has no limit on the number of vnodes, which is just limited by the computing and storage resources of physical nodes. + +The meda data of each table (including schema, tags, etc.) is also stored in vnode instead of centralized storage in mnode. In fact, this means sharding of meta data, which is convenient for efficient and parallel tag filtering operations. + +### Data Partitioning + +In addition to vnode sharding, TDengine partitions the time-series data by time range. Each data file contains only one time range of time-series data, and the length of the time range is determined by DB's configuration parameter “days”. This method of partitioning by time rang is also convenient to efficiently implement the data retention strategy. As long as the data file exceeds the specified number of days (system configuration parameter ‘keep’), it will be automatically deleted. Moreover, different time ranges can be stored in different paths and storage media, so as to facilitate the cold/hot management of big data and realize tiered-storage. + +In general, **TDengine splits big data by vnode and time as two dimensions**, which is convenient for parallel and efficient management with scale-out. + +### Load Balancing + +Each dnode regularly reports its status (including hard disk space, memory size, CPU, network, number of virtual nodes, etc.) to the mnode (virtual management node) for declaring the status of the entire cluster. Based on the overall state, when an mnode finds an overloaded dnode, it will migrate one or more vnodes to other dnodes. In the process, external services keep running and the data insertion, query and calculation operations are not affected. + +If the mnode has not received the dnode status for a period of time, the dnode will be judged as offline. When offline lasts a certain period of time (the duration is determined by the configuration parameter ‘offlineThreshold’), the dnode will be forcibly removed from the cluster by mnode. If the number of replicas of vnodes on this dnode is greater than one, the system will automatically create new replicas on other dnodes to ensure the replica number. If there are other mnodes on this dnode and the number of mnodes replicas is greater than one, the system will automatically create new mnodes on other dnodes to ensure t the replica number. + +When new data nodes are added to the cluster, with new computing and storage are added, the system will automatically start the load balancing process. + +The load balancing process does not require any manual intervention without application restarted. It will automatically connect new nodes with completely transparence. **Note: load balancing is controlled by parameter “balance”, which determines to turn on/off automatic load balancing.** + +## Data Writing and Replication Process + +If a database has N replicas, thus a virtual node group has N virtual nodes, but only one as Master and all others are slaves. When the application writes a new record to system, only the Master vnode can accept the writing request. If a slave vnode receives a writing request, the system will notifies taosc to redirect. + +### Master vnode Writing Process + +Master Vnode uses a writing process as follows: + +Figure 3: TDengine Master writing process + +1. Master vnode receives the application data insertion request, verifies, and to next step; +2. If the system configuration parameter “walLevel” is greater than 0, vnode will write the original request packet into database log file WAL. If walLevel is set to 2 and fsync is set to 0, TDengine will make WAL data written immediately to ensure that even system goes down, all data can be recovered from database log file; +3. If there are multiple replicas, vnode will forward data packet to slave vnodes in the same virtual node group, and the forwarded packet has a version number with data; +4. Write into memory and add the record to “skip list”; +5. Master vnode returns a confirmation message to the application, indicating a successful writing. +6. If any of Step 2, 3 or 4 fails, the error will directly return to the application. + +### Slave vnode Writing Process + +For a slave vnode, the write process as follows: + +![TDengine Slave Writing Process](page://images/architecture/write_master.png) +
Picture 3 TDengine Slave Writing Process
+ +1. Slave vnode receives a data insertion request forwarded by Master vnode. +2. If the system configuration parameter “walLevel” is greater than 0, vnode will write the original request packet into database log file WAL. If walLevel is set to 2 and fsync is set to 0, TDengine will make WAL data written immediately to ensure that even system goes down, all data can be recovered from database log file; +3. Write into memory and add the record to “skip list”; + +Compared with Master vnode, slave vnode has no forwarding or reply confirmation step, means two steps less. But writing into memory is exactly the same as WAL. + +### Remote Disaster Recovery and IDC Migration + +As above Master and Slave processes discussed, TDengine adopts asynchronous replication for data synchronization. This method can greatly improve the writing performance, with not obvious impact from network delay. By configuring IDC and rack number for each physical node, it can be ensured that for a virtual node group, virtual nodes are composed of physical nodes from different IDC and different racks, thus implementing remote disaster recovery without other tools. + +On the other hand, TDengine supports dynamic modification of the replicas number. Once the number of replicas increases, the newly added virtual nodes will immediately enter the data synchronization process. After synchronization completed, added virtual nodes can provide services. In the synchronization process, master and other synchronized virtual nodes keep serving. With this feature, TDengine can realize IDC room migration without service interruption. It is only necessary to add new physical nodes to the existing IDC cluster, and then remove old physical nodes after the data synchronization is completed. + +However, this asynchronous replication method has a tiny time window of written data lost. The specific scenario is as follows: + +1. Master vnode has completed its 5-step operations, confirmed the success of writing to APP, and then went down; +2. Slave vnode receives the write request, then processing fails before writing to the log in Step 2; +3. Slave vnode will become the new master, thus losing one record + +In theory, as long as in asynchronous replication, there is no guarantee for no losing. However, this window is extremely small, only if mater and slave fail at the same time, and just confirm the successful write to the application before. + +Note: Remote disaster recovery and no-downtime IDC migration are only supported by Enterprise Edition. **Hint: This function is not available yet** + +### Master/slave Selection + +Vnode maintains a Version number. When memory data is persisted, the version number will also be persisted. For each data update operation, whether it is collecting time-series data or metadata, this version number will be increased by one. + +When a vnode starts, the roles (master, slave) are uncertain, and the data is in an unsynchronized state. It’s necessary to establish TCP connections with other nodes in the virtual node group and exchange status, including version and its own roles. Through the exchange, the system implements a master-selection process. The rules are as follows: + +1. If there’s only one replica, it’s always master +2. When all replicas are online, the one with latest version is master +3. Over half of online nodes are virtual nodes, and some virtual node is slave, it will automatically become master +4. For 2 and 3, if multiple virtual nodes meet the requirement, the first vnode in virtual node group list will be selected as master + +See [TDengine 2.0 Data Replication Module Design](https://www.taosdata.com/cn/documentation/architecture/replica/) for more information on the data replication process. + +### Synchronous Replication + +For scenarios with higher data consistency requirements, asynchronous data replication is not applicable, because there is some small probability of data loss. So, TDengine provides a synchronous replication mechanism for users. When creating a database, in addition to specifying the number of replicas, user also needs to specify a new parameter “quorum”. If quorum is greater than one, it means that every time the Master forwards a message to the replica, it needs to wait for “quorum-1” reply confirms before informing the application that data has been successfully written in slave. If “quorum-1” reply confirms are not received within a certain period of time, the master vnode will return an error to the application. + +With synchronous replication, performance of system will decrease and latency will increase. Because metadata needs strong consistent, the default for data synchronization between mnodes is synchronous replication. + +## Caching and Persistence + +### Caching + +TDengine adopts a time-driven cache management strategy (First-In-First-Out, FIFO), also known as a Write-driven Cache Management Mechanism. This strategy is different from the read-driven data caching mode (Least-Recent-Used, LRU), which directly put the most recently written data in the system buffer. When the buffer reaches a threshold, the earliest data are written to disk in batches. Generally speaking, for the use of IoT data, users are most concerned about the newly generated data, that is, the current status. TDengine takes full advantage of this feature to put the most recently arrived (current state) data in the buffer. + +TDengine provides millisecond-level data collecting capability to users through query functions. Putting the recently arrived data directly in the buffer can respond to users' analysis query for the latest piece or batch of data more quickly, and provide faster database query response capability as a whole. In this sense, **TDengine can be used as a data buffer by setting appropriate configuration parameters without deploying Redis or other additional cache systems**, which can effectively simplify the system architecture and reduce the operation costs. It should be noted that after the TDengine is restarted, the buffer of the system will be emptied, the previously cached data will be written to disk in batches, and the previously cached data will not be reloaded into the buffer as so in a proprietary key-value cache system. + +Each vnode has its own independent memory, and it is composed of multiple memory blocks of fixed size, and different vnodes are completely isolated. When writing data, similar to the writing of logs, data is sequentially added to memory, but each vnode maintains its own skip list for quick search. When more than one third of the memory block are used, the disk writing operation will start, and the subsequent writing operation is carried out in a new memory block. By this design, one third of the memory blocks in a vnode keep the latest data, so as to achieve the purpose of caching and quick search. The number of memory blocks of a vnode is determined by the configuration parameter “blocks”, and the size of memory blocks is determined by the configuration parameter “cache”. + +### Persistent Storage + +TDengine uses a data-driven method to write the data from buffer into hard disk for persistent storage. When the cached data in vnode reaches a certain volume, TDengine will also pull up the disk-writing thread to write the cached data into persistent storage in order not to block subsequent data writing. TDengine will open a new database log file when the data is written, and delete the old database log file after written successfully to avoid unlimited log growth. + +To make full use of the characteristics of time-series data, TDengine splits the data stored in persistent storage by a vnode into multiple files, each file only saves data for a fixed number of days, which is determined by the system configuration parameter “days”. By so, for the given start and end date of a query, you can locate the data files to open immediately without any index, thus greatly speeding up reading operations. + +For collected data, there is generally a retention period, which is determined by the system configuration parameter “keep”. Data files exceeding this set number of days will be automatically deleted by the system to free up storage space. + +Given “days” and “keep” parameters, the total number of data files in a vnode is: keep/days. The total number of data files should not be too large or too small. 10 to 100 is appropriate. Based on this principle, reasonable days can be set. In the current version, parameter “keep” can be modified, but parameter “days” cannot be modified once it is set. + +In each data file, the data of a table is stored by blocks. A table can have one or more data file blocks. In a file block, data is stored in columns, occupying a continuous storage space, thus greatly improving the reading speed. The size of file block is determined by the system parameter “maxRows” (the maximum number of records per block), and the default value is 4096. This value should not be too large or too small. If it is too large, the data locating in search will cost longer; if too small, the index of data block is too large, and the compression efficiency will be low with slower reading speed. + +Each data file (with a .data postfix) has a corresponding index file (with a .head postfix). The index file has summary information of a data block for each table, recording the offset of each data block in the data file, start and end time of data and other information, so as to lead system quickly locate the data to be found. Each data file also has a corresponding last file (with a .last postfix), which is designed to prevent data block fragmentation when written in disk. If the number of written records from a table does not reach the system configuration parameter “minRows” (minimum number of records per block), it will be stored in the last file first. When write to disk next time, the newly written records will be merged with the records in last file and then written into data file. + +When data is written to disk, it is decided whether to compress the data according to system configuration parameter “comp”. TDengine provides three compression options: no compression, one-stage compression and two-stage compression, corresponding to comp values of 0, 1 and 2 respectively. One-stage compression is carried out according to the type of data. Compression algorithms include delta-delta coding, simple 8B method, zig-zag coding, LZ4 and other algorithms. Two-stage compression is based on one-stage compression and compressed by general compression algorithm, which has higher compression ratio. + +### Tiered Storage + +By default, TDengine saves all data in /var/lib/taos directory, and the data files of each vnode are saved in a different directory under this directory. In order to expand the storage space, minimize the bottleneck of file reading and improve the data throughput rate, TDengine can configure the system parameter “dataDir” to allow multiple mounted hard disks to be used by system at the same time. In addition, TDengine also provides the function of tiered data storage, i.e. storage on different storage media according to the time stamps of data files. For example, the latest data is stored on SSD, the data for more than one week is stored on local hard disk, and the data for more than four weeks is stored on network storage device, thus reducing the storage cost and ensuring efficient data access. The movement of data on different storage media is automatically done by the system and completely transparent to applications. Tiered storage of data is also configured through the system parameter “dataDir”. + + + +dataDir format is as follows: +``` +dataDir data_path [tier_level] +``` + +Where data_path is the folder path of mount point and tier_level is the media storage-tier. The higher the media storage-tier, means the older the data file. Multiple hard disks can be mounted at the same storage-tier, and data files on the same storage-tier are distributed on all hard disks within the tier. TDengine supports up to 3 tiers of storage, so tier_level values are 0, 1, and 2. When configuring dataDir, there must be only one mount path without specifying tier_level, which is called special mount disk (path). The mount path defaults to level 0 storage media and contains special file links, which cannot be removed, otherwise it will have a devastating impact on the written data. + + + +Suppose a physical node with six mountable hard disks/mnt/disk1,/mnt/disk2, …,/mnt/disk6, where disk1 and disk2 need to be designated as level 0 storage media, disk3 and disk4 are level 1 storage media, and disk5 and disk6 are level 2 storage media. Disk1 is a special mount disk, you can configure it in/etc/taos/taos.cfg as follows: + +``` +dataDir /mnt/disk1/taos +dataDir /mnt/disk2/taos 0 +dataDir /mnt/disk3/taos 1 +dataDir /mnt/disk4/taos 1 +dataDir /mnt/disk5/taos 2 +dataDir /mnt/disk6/taos 2 +``` + +Mounted disks can also be a non-local network disk, as long as the system can access it. + + +Note: Tiered Storage is only supported in Enterprise Edition + +## Data Query + +TDengine provides a variety of query processing functions for tables and STables. In addition to common aggregation queries, TDengine also provides window queries and statistical aggregation functions for time-series data. The query processing of TDengine needs the collaboration of client, vnode and mnode. + +### Single Table Query + +The parsing and verification of SQL statements are completed on the client side. SQL statements are parsed and generate an Abstract Syntax Tree (AST), which is then checksummed. Then request metadata information (table metadata) for the table specified in the query from management node (mnode). + +According to the End Point information in metadata information, the query request is serialized and sent to the data node (dnode) where the table is located. After receiving the query, the dnode identifies the virtual node (vnode) pointed to and forwards the message to the query execution queue of the vnode. The query execution thread of vnode establishes the basic query execution environment, immediately returns the query request and starts executing the query at the same time. + +When client obtains query result, the worker thread in query execution queue of dnode will wait for the execution of vnode execution thread to complete before returning the query result to the requesting client. + +### Aggregation by Time Axis, Downsampling, Interpolation + +The remarkable feature that time-series data is different from ordinary data is that each record has a timestamp, so aggregating data with timestamps on the time axis is an important and unique function from common databases. From this point of view, it is similar to the window query of stream computing engine. + +The keyword “interval” is introduced into TDengine to split fixed length time windows on time axis, and the data are aggregated according to time windows, and the data within window range are aggregated as needed. For example: + +```mysql +select count(*) from d1001 interval(1h); +``` + +According to the data collected by device D1001, the number of records stored per hour is returned by a 1-hour time window. + + + +In application scenarios where query results need to be obtained continuously, if there is data missing in a given time interval, the data results in this interval will also be lost. TDengine provides a strategy to interpolate the results of timeline aggregation calculation. The results of time axis aggregation can be interpolated by using keyword Fill. For example: + +```mysql +select count(*) from d1001 interval(1h) fill(prev); +``` + +According to the data collected by device D1001, the number of records per hour is counted. If there is no data in a certain hour, statistical data of the previous hour is returned. TDengine provides forward interpolation (prev), linear interpolation (linear), NULL value populating (NULL), and specific value populating (value). + +### Multi-table Aggregation Query + +TDengine creates a separate table for each data collection point, but in practical applications, it is often necessary to aggregate data from different collection points. In order to perform aggregation operations efficiently, TDengine introduces the concept of STable. STable is used to represent a specific type of data collection point. It is a table set containing multiple tables. The schema of each table in the set is completely consistent, but each table has its own static tag. The tags can be multiple and be added, deleted and modified at any time. Applications can aggregate or statistically operate all or a subset of tables under a STABLE by specifying tag filters, thus greatly simplifying the development of applications. The process is shown in the following figure: + +![Diagram of multi-table aggregation query](page://images/architecture/multi_tables.png) +
Picture 4 Diagram of multi-table aggregation query
+ +1. Application sends a query condition to system; +2. taosc sends the STable name to Meta Node(management node); +3. Management node sends the vnode list owned by the STable back to taosc; +4. taosc sends the computing request together with tag filters to multiple data nodes corresponding to these vnodes; +5. Each vnode first finds out the set of tables within its own node that meet the tag filters from memory, then scans the stored time-series data, completes corresponding aggregation calculations, and returns result to taosc; +6. taosc finally aggregates the results returned by multiple data nodes and send them back to application. + +Since TDengine stores tag data and time-series data separately in vnode, by filtering tag data in memory, the set of tables that need to participate in aggregation operation is first found, which greatly reduces the volume of data scanned and improves aggregation calculation speed. At the same time, because the data is distributed in multiple vnodes/dnodes, the aggregation calculation operation is carried out concurrently in multiple vnodes, which further improves the aggregation speed. Aggregation functions for ordinary tables and most operations are applicable to STables. The syntax is exactly the same. Please see TAOS SQL for details. + +### Precomputation + +In order to effectively improve the performance of query processing, based-on the unchangeable feature of IoT data, statistical information of data stored in data block is recorded in the head of data block, including max value, min value, and sum. We call it a precomputing unit. If the query processing involves all the data of a whole data block, the pre-calculated results are directly used, and no need to read the data block contents at all. Since the amount of pre-calculated data is much smaller than the actual size of data block stored on disk, for query processing with disk IO as bottleneck, the use of pre-calculated results can greatly reduce the pressure of reading IO and accelerate the query process. The precomputation mechanism is similar to the index BRIN (Block Range Index) of PostgreSQL. diff --git a/documentation20/en/04.model/docs.md b/documentation20/en/04.model/docs.md new file mode 100644 index 0000000000000000000000000000000000000000..5ab5e0c6a56d0b5d534386752988cb1adae3b2fa --- /dev/null +++ b/documentation20/en/04.model/docs.md @@ -0,0 +1,74 @@ +# Data Modeling + +TDengine adopts a relational data model, so we need to build the "database" and "table". Therefore, for a specific application scenario, it is necessary to consider the design of the database, STable and ordinary table. This section does not discuss detailed syntax rules, but only concepts. + +Please watch the [video tutorial](https://www.taosdata.com/blog/2020/11/11/1945.html) for data modeling. + +## Create a Database + +Different types of data collection points often have different data characteristics, including frequency of data collecting, length of data retention time, number of replicas, size of data blocks, whether to update data or not, and so on. To ensure TDengine working with great efficiency in various scenarios, TDengine suggests creating tables with different data characteristics in different databases, because each database can be configured with different storage strategies. When creating a database, in addition to SQL standard options, the application can also specify a variety of parameters such as retention duration, number of replicas, number of memory blocks, time accuracy, max and min number of records in a file block, whether it is compressed or not, and number of days a data file will be overwritten. For example: + +```mysql +CREATE DATABASE power KEEP 365 DAYS 10 BLOCKS 4 UPDATE 1; +``` + +The above statement will create a database named “power”. The data of this database will be kept for 365 days (it will be automatically deleted 365 days later), one data file created per 10 days, and the number of memory blocks is 4 for data updating. For detailed syntax and parameters, please refer to [Data Management section of TAOS SQL](https://www.taosdata.com/en/documentation/taos-sql#management). + +After the database created, please use SQL command USE to switch to the new database, for example: + +```mysql +USE power; +``` + +Replace the database operating in the current connection with “power”, otherwise, before operating on a specific table, you need to use "database name. table name" to specify the name of database to use. + +**Note:** + +- Any table or STable belongs to a database. Before creating a table, a database must be created first. +- Tables in two different databases cannot be JOIN. + +## Create a STable + +An IoT system often has many types of devices, such as smart meters, transformers, buses, switches, etc. for power grids. In order to facilitate aggregation among multiple tables, using TDengine, it is necessary to create a STable for each type of data collection point. Taking the smart meter in Table 1 as an example, you can use the following SQL command to create a STable: + +```mysql +CREATE STABLE meters (ts timestamp, current float, voltage int, phase float) TAGS (location binary(64), groupdId int); +``` + +**Note:** The STABLE keyword in this instruction needs to be written as TABLE in versions before 2.0.15. + +Just like creating an ordinary table, you need to provide the table name (‘meters’ in the example) and the table structure Schema, that is, the definition of data columns. The first column must be a timestamp (‘ts’ in the example), the other columns are the physical metrics collected (current, volume, phase in the example), and the data types can be int, float, string, etc. In addition, you need to provide the schema of the tag (location, groupId in the example), and the data types of the tag can be int, float, string and so on. Static attributes of collection points can often be used as tags, such as geographic location of collection points, device model, device group ID, administrator ID, etc. The schema of the tag can be added, deleted and modified afterwards. Please refer to the [STable Management section of TAOS SQL](https://www.taosdata.com/cn/documentation/taos-sql#super-table) for specific definitions and details. + +Each type of data collection point needs an established STable, so an IoT system often has multiple STables. For the power grid, we need to build a STable for smart meters, transformers, buses, switches, etc. For IoT, a device may have multiple data collection points (for example, a fan for wind-driven generator, some collection points capture parameters such as current and voltage, and some capture environmental parameters such as temperature, humidity and wind direction). In this case, multiple STables need to be established for corresponding types of devices. All collected physical metrics contained in one and the same STable must be collected at the same time (with a consistent timestamp). + +A STable allows up to 1024 columns. If the number of physical metrics collected at a collection point exceeds 1024, multiple STables need to be built to process them. A system can have multiple DBs, and a DB can have one or more STables. + +## Create a Table + +TDengine builds a table independently for each data collection point. Similar to standard relational data, one table has a table name, Schema, but in addition, it can also carry one or more tags. When creating, you need to use the STable as a template and specify the specific value of the tag. Taking the smart meter in Table 1 as an example, the following SQL command can be used to build the table: + +```mysql +CREATE TABLE d1001 USING meters TAGS ("Beijing.Chaoyang", 2); +``` + +Where d1001 is the table name, meters is the name of the STable, followed by the specific tag value of tag Location as "Beijing.Chaoyang", and the specific tag value of tag groupId 2. Although the tag value needs to be specified when creating the table, it can be modified afterwards. Please refer to the [Table Management section of TAOS SQL](https://www.taosdata.com/en/documentation/taos-sql#table) for details. + +**Note: ** At present, TDengine does not technically restrict the use of a STable of a database (dbA) as a template to create a sub-table of another database (dbB). This usage will be prohibited later, and it is not recommended to use this method to create a table. + +TDengine suggests to use the globally unique ID of data collection point as a table name (such as device serial number). However, in some scenarios, there is no unique ID, and multiple IDs can be combined into a unique ID. It is not recommended to use a unique ID as tag value. + +**Automatic table creating** : In some special scenarios, user is not sure whether the table of a certain data collection point exists when writing data. In this case, the non-existent table can be created by using automatic table building syntax when writing data. If the table already exists, no new table will be created. For example: + +```mysql +INSERT INTO d1001 USING METERS TAGS ("Beijng.Chaoyang", 2) VALUES (now, 10.2, 219, 0.32); +``` + +The SQL statement above inserts records (now, 10.2, 219, 0.32) into table d1001. If table d1001 has not been created yet, the STable meters is used as the template to automatically create it, and the tag value "Beijing.Chaoyang", 2 is marked at the same time. + +For detailed syntax of automatic table building, please refer to the "[Automatic Table Creation When Inserting Records](https://www.taosdata.com/en/documentation/taos-sql#auto_create_table)" section. + +## Multi-column Model vs Single-column Model + +TDengine supports multi-column model. As long as physical metrics are collected simultaneously by a data collection point (with a consistent timestamp), these metrics can be placed in a STable as different columns. However, there is also an extreme design, a single-column model, in which each collected physical metric is set up separately, so each type of physical metrics is set up separately with a STable. For example, create 3 Stables, one each for current, voltage and phase. + +TDengine recommends using multi-column model as much as possible because of higher insertion and storage efficiency. However, for some scenarios, types of collected metrics often change. In this case, if multi-column model is adopted, the structure definition of STable needs to be frequently modified so make the application complicated. To avoid that, single-column model is recommended. diff --git a/documentation20/en/05.insert/docs.md b/documentation20/en/05.insert/docs.md new file mode 100644 index 0000000000000000000000000000000000000000..88746ea60867b37e5956075f88c48ebd8276dfaa --- /dev/null +++ b/documentation20/en/05.insert/docs.md @@ -0,0 +1,282 @@ +# Efficient Data Writing + +TDengine supports multiple interfaces to write data, including SQL, Prometheus, Telegraf, EMQ MQTT Broker, HiveMQ Broker, CSV file, etc. Kafka, OPC and other interfaces will be provided in the future. Data can be inserted in a single piece or in batches, data from one or multiple data collection points can be inserted at the same time. TDengine supports multi-thread insertion, nonsequential data insertion, and also historical data insertion. + +## SQL Writing + +Applications insert data by executing SQL insert statements through C/C++, JDBC, GO, or Python Connector, and users can manually enter SQL insert statements to insert data through TAOS Shell. For example, the following insert writes a record to table d1001: + +```mysql +INSERT INTO d1001 VALUES (1538548685000, 10.3, 219, 0.31); +``` + +TDengine supports writing multiple records at a time. For example, the following command writes two records to table d1001: + +```mysql +INSERT INTO d1001 VALUES (1538548684000, 10.2, 220, 0.23) (1538548696650, 10.3, 218, 0.25); +``` + +TDengine also supports writing data to multiple tables at a time. For example, the following command writes two records to d1001 and one record to d1002: + +```mysql +INSERT INTO d1001 VALUES (1538548685000, 10.3, 219, 0.31) (1538548695000, 12.6, 218, 0.33) d1002 VALUES (1538548696800, 12.3, 221, 0.31); +``` + +For the SQL INSERT Grammar, please refer to [Taos SQL insert](https://www.taosdata.com/en/documentation/taos-sql#insert)。 + +**Tips:** + +- To improve writing efficiency, batch writing is required. The more records written in a batch, the higher the insertion efficiency. However, a record cannot exceed 16K, and the total length of an SQL statement cannot exceed 64K (it can be configured by parameter maxSQLLength, and the maximum can be configured to 1M). +- TDengine supports multi-thread parallel writing. To further improve writing speed, a client needs to open more than 20 threads to write parallelly. However, after the number of threads reaches a certain threshold, it cannot be increased or even become decreased, because too much frequent thread switching brings extra overhead. +- For a same table, if the timestamp of a newly inserted record already exists, (no database was created using UPDATE 1) the new record will be discarded as default, that is, the timestamp must be unique in a table. If an application automatically generates records, it is very likely that the generated timestamps will be the same, so the number of records successfully inserted will be smaller than the number of records the application try to insert. If you use UPDATE 1 option when creating a database, inserting a new record with the same timestamp will overwrite the original record. +- The timestamp of written data must be greater than the current time minus the time of configuration parameter keep. If keep is configured for 3650 days, data older than 3650 days cannot be written. The timestamp for writing data cannot be greater than the current time plus configuration parameter days. If days is configured to 2, data 2 days later than the current time cannot be written. + +## Direct Writing of Prometheus + +As a graduate project of Cloud Native Computing Foundation, [Prometheus](https://www.prometheus.io/) is widely used in the field of performance monitoring and K8S performance monitoring. TDengine provides a simple tool [Bailongma](https://github.com/taosdata/Bailongma), which only needs to be simply configured in Prometheus without any code, and can directly write the data collected by Prometheus into TDengine, then automatically create databases and related table entries in TDengine according to rules. Blog post [Use Docker Container to Quickly Build a Devops Monitoring Demo](https://www.taosdata.com/blog/2020/02/03/1189.html), which is an example of using bailongma to write Prometheus and Telegraf data into TDengine. + +### Compile blm_prometheus From Source + +Users need to download the source code of [Bailongma](https://github.com/taosdata/Bailongma) from github, then compile and generate an executable file using Golang language compiler. Before you start compiling, you need to complete following prepares: + +- A server running Linux OS +- Golang version 1.10 and higher installed +- An appropriated TDengine version. Because the client dynamic link library of TDengine is used, it is necessary to install the same version of TDengine as the server-side; for example, if the server version is TDengine 2.0. 0, ensure install the same version on the linux server where bailongma is located (can be on the same server as TDengine, or on a different server) + +Bailongma project has a folder, blm_prometheus, which holds the prometheus writing API. The compiling process is as follows: + +```bash +cd blm_prometheus + +go build +``` + +If everything goes well, an executable of blm_prometheus will be generated in the corresponding directory. + +### Install Prometheus + +Download and install as the instruction of Prometheus official website. [Download Address](https://prometheus.io/download/) + +### Configure Prometheus + +Read the Prometheus [configuration document](https://prometheus.io/docs/prometheus/latest/configuration/configuration/) and add following configurations in the section of Prometheus configuration file + +- url: The URL provided by bailongma API service, refer to the blm_prometheus startup example section below + +After Prometheus launched, you can check whether data is written successfully through query taos client. + +### Launch blm_prometheus + +blm_prometheus has following options that you can configure when you launch blm_prometheus. + +```sh +--tdengine-name + +If TDengine is installed on a server with a domain name, you can also access the TDengine by configuring the domain name of it. In K8S environment, it can be configured as the service name that TDengine runs + +--batch-size + +blm_prometheus assembles the received prometheus data into a TDengine writing request. This parameter controls the number of data pieces carried in a writing request sent to TDengine at a time. + +--dbname + +Set a name for the database created in TDengine, blm_prometheus will automatically create a database named dbname in TDengine, and the default value is prometheus. + +--dbuser + +Set the user name to access TDengine, the default value is'root ' + +--dbpassword + +Set the password to access TDengine, the default value is'taosdata ' + +--port + +The port number blm_prometheus used to serve prometheus. +``` + + + +### Example + +Launch an API service for blm_prometheus with the following command: + +```bash +./blm_prometheus -port 8088 +``` + +Assuming that the IP address of the server where blm_prometheus located is "10.1.2. 3", the URL shall be added to the configuration file of Prometheus as: + +remote_write: + +\- url: "http://10.1.2.3:8088/receive" + + + +### Query written data of prometheus + +The format of generated data by Prometheus is as follows: + +```json +{ + Timestamp: 1576466279341, + Value: 37.000000, + apiserver_request_latencies_bucket { + component="apiserver", + instance="192.168.99.116:8443", + job="kubernetes-apiservers", + le="125000", + resource="persistentvolumes", s + cope="cluster", + verb="LIST", + version=“v1" + } +} +``` + +Where apiserver_request_latencies_bucket is the name of the time-series data collected by prometheus, and the tag of the time-series data is in the following {}. blm_prometheus automatically creates a STable in TDengine with the name of the time series data, and converts the tag in {} into the tag value of TDengine, with Timestamp as the timestamp and value as the value of the time-series data. Therefore, in the client of TDEngine, you can check whether this data was successfully written through the following instruction. + +```mysql +use prometheus; + +select * from apiserver_request_latencies_bucket; +``` + + + +## Direct Writing of Telegraf + +[Telegraf](https://www.influxdata.com/time-series-platform/telegraf/) is a popular open source tool for IT operation data collection. TDengine provides a simple tool [Bailongma](https://github.com/taosdata/Bailongma), which only needs to be simply configured in Telegraf without any code, and can directly write the data collected by Telegraf into TDengine, then automatically create databases and related table entries in TDengine according to rules. Blog post [Use Docker Container to Quickly Build a Devops Monitoring Demo](https://www.taosdata.com/blog/2020/02/03/1189.html), which is an example of using bailongma to write Prometheus and Telegraf data into TDengine. + +### Compile blm_telegraf From Source Code + +Users need to download the source code of [Bailongma](https://github.com/taosdata/Bailongma) from github, then compile and generate an executable file using Golang language compiler. Before you start compiling, you need to complete following prepares: + +- A server running Linux OS +- Golang version 1.10 and higher installed +- An appropriated TDengine version. Because the client dynamic link library of TDengine is used, it is necessary to install the same version of TDengine as the server-side; for example, if the server version is TDengine 2.0. 0, ensure install the same version on the linux server where bailongma is located (can be on the same server as TDengine, or on a different server) + +Bailongma project has a folder, blm_telegraf, which holds the Telegraf writing API. The compiling process is as follows: + +```bash +cd blm_telegraf + +go build +``` + +If everything goes well, an executable of blm_telegraf will be generated in the corresponding directory. + +### Install Telegraf + +At the moment, TDengine supports Telegraf version 1.7. 4 and above. Users can download the installation package on Telegraf's website according to your current operating system. The download address is as follows: https://portal.influxdata.com/downloads + +### Configure Telegraf + +Modify the TDengine-related configurations in the Telegraf configuration file /etc/telegraf/telegraf.conf. + +In the output plugins section, add the [[outputs.http]] configuration: + +- url: The URL provided by bailongma API service, please refer to the example section below +- data_format: "json" +- json_timestamp_units: "1ms" + +In agent section: + +- hostname: The machine name that distinguishes different collection devices, and it is necessary to ensure its uniqueness +- metric_batch_size: 100, which is the max number of records per batch wriiten by Telegraf allowed. Increasing the number can reduce the request sending frequency of Telegraf. + +For information on how to use Telegraf to collect data and more about using Telegraf, please refer to the official [document](https://docs.influxdata.com/telegraf/v1.11/) of Telegraf. + +### Launch blm_telegraf + +blm_telegraf has following options, which can be set to tune configurations of blm_telegraf when launching. + +```sh +--host + +The ip address of TDengine server, default is null + +--batch-size + +blm_prometheus assembles the received telegraf data into a TDengine writing request. This parameter controls the number of data pieces carried in a writing request sent to TDengine at a time. + +--dbname + +Set a name for the database created in TDengine, blm_telegraf will automatically create a database named dbname in TDengine, and the default value is prometheus. + +--dbuser + +Set the user name to access TDengine, the default value is 'root ' + +--dbpassword + +Set the password to access TDengine, the default value is'taosdata ' + +--port + +The port number blm_telegraf used to serve Telegraf. +``` + + + +### Example + +Launch an API service for blm_telegraf with the following command + +```bash +./blm_telegraf -host 127.0.0.1 -port 8089 +``` + +Assuming that the IP address of the server where blm_telegraf located is "10.1.2. 3", the URL shall be added to the configuration file of telegraf as: + +```yaml +url = "http://10.1.2.3:8089/telegraf" +``` + +### Query written data of telegraf + +The format of generated data by telegraf is as follows: + +```json +{ + "fields": { + "usage_guest": 0, + "usage_guest_nice": 0, + "usage_idle": 89.7897897897898, + "usage_iowait": 0, + "usage_irq": 0, + "usage_nice": 0, + "usage_softirq": 0, + "usage_steal": 0, + "usage_system": 5.405405405405405, + "usage_user": 4.804804804804805 + }, + + "name": "cpu", + "tags": { + "cpu": "cpu2", + "host": "bogon" + }, + "timestamp": 1576464360 +} +``` + +Where the name field is the name of the time-series data collected by telegraf, and the tag field is the tag of the time-series data. blm_telegraf automatically creates a STable in TDengine with the name of the time series data, and converts the tag field into the tag value of TDengine, with Timestamp as the timestamp and fields values as the value of the time-series data. Therefore, in the client of TDEngine, you can check whether this data was successfully written through the following instruction. + +```mysql +use telegraf; + +select * from cpu; +``` + +MQTT is a popular data transmission protocol in the IoT. TDengine can easily access the data received by MQTT Broker and write it to TDengine. + +## Direct Writing of EMQ Broker + +[EMQ](https://github.com/emqx/emqx) is an open source MQTT Broker software, with no need of coding, only to use "rules" in EMQ Dashboard for simple configuration, and MQTT data can be directly written into TDengine. EMQ X supports storing data to the TDengine by sending it to a Web service, and also provides a native TDengine driver on Enterprise Edition for direct data store. Please refer to [EMQ official documents](https://docs.emqx.io/broker/latest/cn/rule/rule-example.html#%E4%BF%9D%E5%AD%98%E6%95%B0%E6%8D%AE%E5%88%B0-tdengine) for more details. + + + +## Direct Writing of HiveMQ Broker + +[HiveMQ](https://www.hivemq.com/) is an MQTT agent that provides Free Personal and Enterprise Edition versions. It is mainly used for enterprises, emerging machine-to-machine(M2M) communication and internal transmission to meet scalability, easy management and security features. HiveMQ provides an open source plug-in development kit. You can store data to TDengine via HiveMQ extension-TDengine. Refer to the [HiveMQ extension-TDengine documentation](https://github.com/huskar-t/hivemq-tdengine-extension/blob/b62a26ecc164a310104df57691691b237e091c89/README.md) for more details. diff --git a/documentation20/en/06.queries/docs.md b/documentation20/en/06.queries/docs.md new file mode 100644 index 0000000000000000000000000000000000000000..c4f1359820a28b390e84be93e077fecb1d5ede0e --- /dev/null +++ b/documentation20/en/06.queries/docs.md @@ -0,0 +1,99 @@ +# Efficient Data Querying + +## Main Query Features + +TDengine uses SQL as the query language. Applications can send SQL statements through C/C++, Java, Go, Python connectors, and users can manually execute SQL Ad-Hoc Query through the Command Line Interface (CLI) tool TAOS Shell provided by TDengine. TDengine supports the following query functions: + +- Single-column and multi-column data query +- Multiple filters for tags and numeric values: >, <, =, < >, like, etc +- Group by, Order by, Limit/Offset of aggregation results +- Four operations for numeric columns and aggregation results +- Time stamp aligned join query (implicit join) operations +- Multiple aggregation/calculation functions: count, max, min, avg, sum, twa, stddev, leastsquares, top, bottom, first, last, percentile, apercentile, last_row, spread, diff, etc + +For example, in TAOS shell, the records with vlotage > 215 are queried from table d1001, sorted in descending order by timestamps, and only two records are outputted. + +```mysql +taos> select * from d1001 where voltage > 215 order by ts desc limit 2; + ts | current | voltage | phase | +====================================================================================== + 2018-10-03 14:38:16.800 | 12.30000 | 221 | 0.31000 | + 2018-10-03 14:38:15.000 | 12.60000 | 218 | 0.33000 | +Query OK, 2 row(s) in set (0.001100s) +``` + +In order to meet the needs of an IoT scenario, TDengine supports several special functions, such as twa (time weighted average), spread (difference between maximum and minimum), last_row (last record), etc. More functions related to IoT scenarios will be added. TDengine also supports continuous queries. + +For specific query syntax, please see the [Data Query section of TAOS SQL](https://www.taosdata.com/cn/documentation/taos-sql#select). + +## Multi-table Aggregation Query + +In an IoT scenario, there are often multiple data collection points in a same type. TDengine uses the concept of STable to describe a certain type of data collection point, and an ordinary table to describe a specific data collection point. At the same time, TDengine uses tags to describe the statical attributes of data collection points. A given data collection point has a specific tag value. By specifying the filters of tags, TDengine provides an efficient method to aggregate and query the sub-tables of STables (data collection points of a certain type). Aggregation functions and most operations on ordinary tables are applicable to STables, and the syntax is exactly the same. + +**Example 1**: In TAOS Shell, look up the average voltages collected by all smart meters in Beijing and group them by location + +```mysql +taos> SELECT AVG(voltage) FROM meters GROUP BY location; + avg(voltage) | location | +============================================================= + 222.000000000 | Beijing.Haidian | + 219.200000000 | Beijing.Chaoyang | +Query OK, 2 row(s) in set (0.002136s) +``` + +**Example 2**: In TAOS Shell, look up the number of records with groupId 2 in the past 24 hours, check the maximum current of all smart meters + +```mysql +taos> SELECT count(*), max(current) FROM meters where groupId = 2 and ts > now - 24h; + cunt(*) | max(current) | +================================== + 5 | 13.4 | +Query OK, 1 row(s) in set (0.002136s) +``` + +TDengine only allows aggregation queries between tables belonging to a same STable, means aggregation queries between different STables are not supported. In the Data Query section of TAOS SQL, query class operations will all be indicated that whether STables are supported. + +## Down Sampling Query, Interpolation + +In a scenario of IoT, it is often necessary to aggregate the collected data by intervals through down sampling. TDengine provides a simple keyword interval, which makes query operations according to time windows extremely simple. For example, the current values collected by smart meter d1001 are summed every 10 seconds. + +```mysql +taos> SELECT sum(current) FROM d1001 INTERVAL(10s); + ts | sum(current) | +====================================================== + 2018-10-03 14:38:00.000 | 10.300000191 | + 2018-10-03 14:38:10.000 | 24.900000572 | +Query OK, 2 row(s) in set (0.000883s) +``` + +The down sampling operation is also applicable to STables, such as summing the current values collected by all smart meters in Beijing every second. + +```mysql +taos> SELECT SUM(current) FROM meters where location like "Beijing%" INTERVAL(1s); + ts | sum(current) | +====================================================== + 2018-10-03 14:38:04.000 | 10.199999809 | + 2018-10-03 14:38:05.000 | 32.900000572 | + 2018-10-03 14:38:06.000 | 11.500000000 | + 2018-10-03 14:38:15.000 | 12.600000381 | + 2018-10-03 14:38:16.000 | 36.000000000 | +Query OK, 5 row(s) in set (0.001538s) +``` + +The down sampling operation also supports time offset, such as summing the current values collected by all smart meters every second, but requires each time window to start from 500 milliseconds. + +```mysql +taos> SELECT SUM(current) FROM meters INTERVAL(1s, 500a); + ts | sum(current) | +====================================================== + 2018-10-03 14:38:04.500 | 11.189999809 | + 2018-10-03 14:38:05.500 | 31.900000572 | + 2018-10-03 14:38:06.500 | 11.600000000 | + 2018-10-03 14:38:15.500 | 12.300000381 | + 2018-10-03 14:38:16.500 | 35.000000000 | +Query OK, 5 row(s) in set (0.001521s) +``` + +In a scenario of IoT, it is difficult to synchronize the time stamp of collected data at each point, but many analysis algorithms (such as FFT) need to align the collected data strictly at equal intervals of time. In many systems, it’s required to write their own programs to process, but the down sampling operation of TDengine can be easily solved. If there is no collected data in an interval, TDengine also provides interpolation calculation function. + +For details of syntax rules, please refer to the [Time-dimension Aggregation section of TAOS SQL](https://www.taosdata.com/en/documentation/taos-sql#aggregation). \ No newline at end of file diff --git a/documentation20/en/07.advanced-features/docs.md b/documentation20/en/07.advanced-features/docs.md new file mode 100644 index 0000000000000000000000000000000000000000..cebbb4a269047b956359252bbdb71fb1a4ba8ff8 --- /dev/null +++ b/documentation20/en/07.advanced-features/docs.md @@ -0,0 +1,360 @@ +# Advanced Features + +## Continuous Query + +Continuous Query is a query executed by TDengine periodically with a sliding window, it is a simplified stream computing driven by timers. Continuous query can be applied to a table or a STable automatically and periodically, and the result set can be passed to the application directly via call back function, or written into a new table in TDengine. The query is always executed on a specified time window (window size is specified by parameter interval), and this window slides forward while time flows (the sliding period is specified by parameter sliding). + +Continuous query of TDengine adopts time-driven mode, which can be defined directly by TAOS SQL without additional operation. Using continuous query, results can be generated conveniently and quickly according to the time window, thus down sampling the original collected data. After the user defines a continuous query through TAOS SQL, TDengine automatically pulls up the query at the end of the last complete time period and pushes the calculated results to the user or writes them back to TDengine. + +The continuous query provided by TDengine differs from the time window calculation in ordinary stream computing in the following ways: + +- Unlike the real-time feedback calculated results of stream computing, continuous query only starts calculation after the time window is closed. For example, if the time period is 1 day, the results of that day will only be generated after 23:59:59. +- If a history record is written to the time interval that has been calculated, the continuous query will not recalculate and will not push the results to the user again. For the mode of writing back to TDengine, the existing calculated results will not be updated. +- Using the mode of continuous query pushing results, the server does not cache the client's calculation status, nor does it provide Exactly-Once semantic guarantee. If the user's application side crashed, the continuous query pulled up again would only recalculate the latest complete time window from the time pulled up again. If writeback mode is used, TDengine can ensure the validity and continuity of data writeback. + +### How to use continuous query + +The following is an example of the smart meter scenario to introduce the specific use of continuous query. Suppose we create a STables and sub-tables through the following SQL statement: + +```sql +create table meters (ts timestamp, current float, voltage int, phase float) tags (location binary(64), groupdId int); +create table D1001 using meters tags ("Beijing.Chaoyang", 2); +create table D1002 using meters tags ("Beijing.Haidian", 2); +... +``` + +We already know that the average voltage of these meters can be counted with one minute as the time window and 30 seconds as the forward increment through the following SQL statement. + +```sql +select avg(voltage) from meters interval(1m) sliding(30s); +``` + +Every time this statement is executed, all data will be recalculated. If you need to execute every 30 seconds to incrementally calculate the data of the latest minute, you can improve the above statement as following, using a different `startTime` each time and executing it regularly: + +```sql +select avg(voltage) from meters where ts > {startTime} interval(1m) sliding(30s); +``` + +There is no problem with this, but TDengine provides a simpler method, just add `create table {tableName} as` before the initial query statement, for example: + +```sql +create table avg_vol as select avg(voltage) from meters interval(1m) sliding(30s); +``` + +A new table named `avg_vol` will be automatically created, and then every 30 seconds, TDengine will incrementally execute the SQL statement after `as` and write the query result into this table. The user program only needs to query the data from `avg_vol`. For example: + +```mysql +taos> select * from avg_vol; + ts | avg_voltage_ | +=================================================== + 2020-07-29 13:37:30.000 | 222.0000000 | + 2020-07-29 13:38:00.000 | 221.3500000 | + 2020-07-29 13:38:30.000 | 220.1700000 | + 2020-07-29 13:39:00.000 | 223.0800000 | +``` + +It should be noted that the minimum value of the query time window is 10 milliseconds, and there is no upper limit of the time window range. + +In addition, TDengine also supports users to specify the starting and ending times of a continuous query. If the start time is not entered, the continuous query will start from the time window where the first original data is located; If no end time is entered, the continuous query will run permanently; If the user specifies an end time, the continuous query stops running after the system time reaches the specified time. For example, a continuous query created with the following SQL will run for one hour and then automatically stop. + +```mysql +create table avg_vol as select avg(voltage) from meters where ts > now and ts <= now + 1h interval(1m) sliding(30s); +``` + +It should be noted that now in the above example refers to the time when continuous queries are created, not the time when queries are executed, otherwise, queries cannot be stopped automatically. In addition, in order to avoid the problems caused by delayed writing of original data as much as possible, there is a certain delay in the calculation of continuous queries in TDengine. In other words, after a time window has passed, TDengine will not immediately calculate the data of this window, so it will take a while (usually not more than 1 minute) to find the calculation result. + +### Manage the Continuous Query + +Users can view all continuous queries running in the system through the show streams command in the console, and can kill the corresponding continuous queries through the kill stream command. Subsequent versions will provide more finer-grained and convenient continuous query management commands. + +## Publisher/Subscriber + +Based on the natural time-series characteristics of data, the data insert of TDengine is logically consistent with the data publish (pub) of messaging system, which can be regarded as a new record inserted with timestamp in the system. At the same time, TDengine stores data in strict accordance with the monotonous increment of time-series. Essentially, every table in TDengine can be regarded as a standard messaging queue. + +TDengine supports embedded lightweight message subscription and publishment services. Using the API provided by the system, users can subscribe to one or more tables in the database using common query statements. The maintenance of subscription logic and operation status is completed by the client. The client regularly polls the server for whether new records arrive, and the results will be fed back to the client when new records arrive. + +The status of the subscription and publishment services of TDengine is maintained by the client, but not by the TDengine server. Therefore, if the application restarts, it is up to the application to decide from which point of time to obtain the latest data. + +In TDengine, there are three main APIs relevant to subscription: + +```c +taos_subscribe +taos_consume +taos_unsubscribe +``` + +Please refer to the [C/C++ Connector](https://www.taosdata.com/cn/documentation/connector/) for the documentation of these APIs. The following is still a smart meter scenario as an example to introduce their specific usage (please refer to the previous section "Continuous Query" for the structure of STables and sub-tables). The complete sample code can be found [here](https://github.com/taosdata/TDengine/blob/master/tests/examples/c/subscribe.c). + +If we want to be notified and do some process when the current of a smart meter exceeds a certain limit (e.g. 10A), there are two methods: one is to query each sub-table separately, record the timestamp of the last piece of data after each query, and then only query all data after this timestamp: + +```sql +select * from D1001 where ts > {last_timestamp1} and current > 10; +select * from D1002 where ts > {last_timestamp2} and current > 10; +... +``` + +This is indeed feasible, but as the number of meters increases, the number of queries will also increase, and the performance of both the client and the server will be affected, until the system cannot afford it. + +Another method is to query the STable. In this way, no matter how many meters there are, only one query is required: + +```sql +select * from meters where ts > {last_timestamp} and current > 10; +``` + +However, how to choose `last_timestamp` has become a new problem. Because, on the one hand, the time of data generation (the data timestamp) and the time of data storage are generally not the same, and sometimes the deviation is still very large; On the other hand, the time when the data of different meters arrive at TDengine will also vary. Therefore, if we use the timestamp of the data from the slowest meter as `last_timestamp` in the query, we may repeatedly read the data of other meters; If the timestamp of the fastest meter is used, the data of other meters may be missed. + +The subscription function of TDengine provides a thorough solution to the above problem. + +First, use `taos_subscribe` to create a subscription: + +```c +TAOS_SUB* tsub = NULL; +if (async) { +  // create an asynchronized subscription, the callback function will be called every 1s +  tsub = taos_subscribe(taos, restart, topic, sql, subscribe_callback, &blockFetch, 1000); +} else { +  // create an synchronized subscription, need to call 'taos_consume' manually +  tsub = taos_subscribe(taos, restart, topic, sql, NULL, NULL, 0); +} +``` + +Subscriptions in TDengine can be either synchronous or asynchronous, and the above code will decide which method to use based on the value of parameter `async` obtained from the command line. Here, synchronous means that the user program calls `taos_consume` directly to pull data, while asynchronous means that the API calls `taos_consume` in another internal thread, and then gives the pulled data to the callback function `subscribe_callback` for processing. + +Parameter `taos` is an established database connection and has no special requirements in synchronous mode. However, in asynchronous mode, it should be noted that it will not be used by other threads, otherwise it may lead to unpredictable errors, because the callback function is called in the internal thread of the API, while some APIs of TDengine are not thread-safe. + +Parameter `sql` is a query statement in which you can specify filters using where clause. In our example, if you only want to subscribe to data when the current exceeds 10A, you can write as follows: + +```sql +select * from meters where current > 10; +``` + +Note that the starting time is not specified here, so the data of all timers will be read. If you only want to start subscribing from the data one day ago and do not need earlier historical data, you can add a time condition: + +```sql +select * from meters where ts > now - 1d and current > 10; +``` + +The `topic` of the subscription is actually its name, because the subscription function is implemented in the client API, so it is not necessary to ensure that it is globally unique, but it needs to be unique on a client machine. + +If the subscription of name `topic` does not exist, the parameter restart is meaningless; However, if the user program exits after creating this subscription, when it starts again and reuses this `topic`, `restart` will be used to decide whether to read data from scratch or from the previous location. In this example, if `restart` is **true** (non-zero value), the user program will definitely read all the data. However, if this subscription exists before, and some data has been read, and `restart` is **false** (zero), the user program will not read the previously read data. + +The last parameter of `taos_subscribe` is the polling period in milliseconds. In synchronous mode, if the interval between the two calls to `taos_consume` is less than this time, `taos_consume` will block until the interval exceeds this time. In asynchronous mode, this time is the minimum time interval between two calls to the callback function. + +The penultimate parameter of `taos_subscribe` is used by the user program to pass additional parameters to the callback function, which is passed to the callback function as it is without any processing by the subscription API. This parameter is meaningless in sync mode. + +After created, the subscription can consume data. In synchronous mode, the sample code is the following as the `else` section: + +```c +if (async) { +  getchar(); +} else while(1) { +  TAOS_RES* res = taos_consume(tsub); +  if (res == NULL) { +    printf("failed to consume data."); +    break; +  } else { +    print_result(res, blockFetch); +    getchar(); +  } +} +``` + +Here is a **while** loop. Every time the user presses the Enter key, `taos_consume` is called, and the return value of `taos_consume` is the query result set, which is exactly the same as `taos_use_result`. In the example, the code using this result set is the function `print_result`: + +```c +void print_result(TAOS_RES* res, int blockFetch) { +  TAOS_ROW row = NULL; +  int num_fields = taos_num_fields(res); +  TAOS_FIELD* fields = taos_fetch_fields(res); +  int nRows = 0; +  if (blockFetch) { +    nRows = taos_fetch_block(res, &row); +    for (int i = 0; i < nRows; i++) { +      char temp[256]; +      taos_print_row(temp, row + i, fields, num_fields); +      puts(temp); +    } +  } else { +    while ((row = taos_fetch_row(res))) { +      char temp[256]; +      taos_print_row(temp, row, fields, num_fields);puts(temp); +      nRows++; +    } +  } +  printf("%d rows consumed.\n", nRows); +} +``` + +Among them, `taos_print_row` is used to process subscription to data. In our example, it will print out all eligible records. In asynchronous mode, it is simpler to consume subscribed data: + +```c +void subscribe_callback(TAOS_SUB* tsub, TAOS_RES *res, void* param, int code) { +  print_result(res, *(int*)param); +} +``` + +To end a data subscription, you need to call `taos_unsubscribe`: + +```c +taos_unsubscribe(tsub, keep); +``` + +Its second parameter is used to decide whether to keep the progress information of subscription on the client. If this parameter is **false** (zero), the subscription can only be restarted no matter what the `restart` parameter is when `taos_subscribe` is called next time. In addition, progress information is saved in the directory {DataDir}/subscribe/. Each subscription has a file with the same name as its `topic`. Deleting a file will also lead to a new start when the corresponding subscription is created next time. + +After introducing the code, let's take a look at the actual running effect. For exmaple: + +- Sample code has been downloaded locally +- TDengine has been installed on the same machine +- All the databases, STables and sub-tables required by the example have been created + +You can compile and start the sample program by executing the following command in the directory where the sample code is located: + +```shell +$ make +$ ./subscribe -sql='select * from meters where current > 10;' +``` + +After the sample program starts, open another terminal window, and the shell that starts TDengine inserts a data with a current of 12A into **D1001**: + +```shell +$ taos +> use test; +> insert into D1001 values(now, 12, 220, 1); +``` + +At this time, because the current exceeds 10A, you should see that the sample program outputs it to the screen. You can continue to insert some data to observe the output of the sample program. + +### Use data subscription in Java + +The subscription function also provides a Java development interface, as described in [Java Connector](https://www.taosdata.com/cn/documentation/connector/). It should be noted that the Java interface does not provide asynchronous subscription mode at present, but user programs can achieve the same feature by creating TimerTask. + +The following is an example to introduce its specific use. The function it completes is basically the same as the C language example described earlier, and it is also to subscribe to all records with current exceeding 10A in the database. + +#### Prepare data + +```sql +# Create power Database +taos> create database power; +# Switch to the database +taos> use power; +# Create a STable +taos> create table meters(ts timestamp, current float, voltage int, phase int) tags(location binary(64), groupId int); +# Create tables +taos> create table d1001 using meters tags ("Beijing.Chaoyang", 2); +taos> create table d1002 using meters tags ("Beijing.Haidian", 2); +# Insert test data +taos> insert into d1001 values("2020-08-15 12:00:00.000", 12, 220, 1),("2020-08-15 12:10:00.000", 12.3, 220, 2),("2020-08-15 12:20:00.000", 12.2, 220, 1); +taos> insert into d1002 values("2020-08-15 12:00:00.000", 9.9, 220, 1),("2020-08-15 12:10:00.000", 10.3, 220, 1),("2020-08-15 12:20:00.000", 11.2, 220, 1); +# Query all records with current over 10A from STable meters +taos> select * from meters where current > 10; + ts | current | voltage | phase | location | groupid | +=========================================================================================================== + 2020-08-15 12:10:00.000 | 10.30000 | 220 | 1 | Beijing.Haidian | 2 | + 2020-08-15 12:20:00.000 | 11.20000 | 220 | 1 | Beijing.Haidian | 2 | + 2020-08-15 12:00:00.000 | 12.00000 | 220 | 1 | Beijing.Chaoyang | 2 | + 2020-08-15 12:10:00.000 | 12.30000 | 220 | 2 | Beijing.Chaoyang | 2 | + 2020-08-15 12:20:00.000 | 12.20000 | 220 | 1 | Beijing.Chaoyang | 2 | +Query OK, 5 row(s) in set (0.004896s) +``` + +#### Example + +```java +public class SubscribeDemo { + private static final String topic = "topic-meter-current-bg-10"; + private static final String sql = "select * from meters where current > 10"; + + public static void main(String[] args) { + Connection connection = null; + TSDBSubscribe subscribe = null; + + try { + Class.forName("com.taosdata.jdbc.TSDBDriver"); + Properties properties = new Properties(); + properties.setProperty(TSDBDriver.PROPERTY_KEY_CHARSET, "UTF-8"); + properties.setProperty(TSDBDriver.PROPERTY_KEY_TIME_ZONE, "UTC-8"); + String jdbcUrl = "jdbc:TAOS://127.0.0.1:6030/power?user=root&password=taosdata"; + connection = DriverManager.getConnection(jdbcUrl, properties); + subscribe = ((TSDBConnection) connection).subscribe(topic, sql, true); // Create a subscription + int count = 0; + while (count < 10) { + TimeUnit.SECONDS.sleep(1); / Wait 1 second to avoid calling consume too frequently and causing pressure on server + TSDBResultSet resultSet = subscribe.consume(); // 消费数据 + if (resultSet == null) { + continue; + } + ResultSetMetaData metaData = resultSet.getMetaData(); + while (resultSet.next()) { + int columnCount = metaData.getColumnCount(); + for (int i = 1; i <= columnCount; i++) { + System.out.print(metaData.getColumnLabel(i) + ": " + resultSet.getString(i) + "\t"); + } + System.out.println(); + count++; + } + } + } catch (Exception e) { + e.printStackTrace(); + } finally { + try { + if (null != subscribe) + subscribe.close(true); // Close the subscription + if (connection != null) + connection.close(); + } catch (SQLException throwables) { + throwables.printStackTrace(); + } + } + } +} +``` + +Run the sample program. First, it consumes all the historical data that meets the query conditions: + +```shell +# java -jar subscribe.jar + +ts: 1597464000000 current: 12.0 voltage: 220 phase: 1 location: Beijing.Chaoyang groupid : 2 +ts: 1597464600000 current: 12.3 voltage: 220 phase: 2 location: Beijing.Chaoyang groupid : 2 +ts: 1597465200000 current: 12.2 voltage: 220 phase: 1 location: Beijing.Chaoyang groupid : 2 +ts: 1597464600000 current: 10.3 voltage: 220 phase: 1 location: Beijing.Haidian groupid : 2 +ts: 1597465200000 current: 11.2 voltage: 220 phase: 1 location: Beijing.Haidian groupid : 2 +``` + +Then, add a piece of data to the table via taos client: + +```sql +# taos +taos> use power; +taos> insert into d1001 values("2020-08-15 12:40:00.000", 12.4, 220, 1); +``` + +Because the current of this data is greater than 10A, the sample program will consume it: + +```shell +ts: 1597466400000 current: 12.4 voltage: 220 phase: 1 location: Beijing.Chaoyang groupid: 2 +``` + +## Cache + +TDengine adopts a time-driven cache management strategy (First-In-First-Out, FIFO), also known as a write-driven cache management mechanism. This strategy is different from the read-driven data cache mode (Least-Recent-Use, LRU), which directly saves the most recently written data in the system buffer. When the buffer reaches a threshold, the oldest data is written to disk in batches. Generally speaking, for the use of IoT data, users are most concerned about the recently generated data, that is, the current status. TDengine takes full advantage of this feature by storing the most recently arrived (current status) data in the buffer. + +TDengine provides data collection in milliseconds to users through query functions. Saving the recently arrived data directly in buffer can respond to the user's query analysis for the latest piece or batch of data more quickly, and provide faster database query response as a whole. In this way, TDengine can be used as a data buffer by setting appropriate configuration parameters without deploying additional caching systems, which can effectively simplify the system architecture and reduce the operation costs. It should be noted that after the TDengine is restarted, the buffer of the system will be emptied, the previously cached data will be written to disk in batches, and the cached data will not reload the previously cached data into the buffer like some proprietary Key-value cache system. + +TDengine allocates a fixed size of memory space as a buffer, which can be configured according to application requirements and hardware resources. By properly setting the buffer space, TDengine can provide extremely high-performance write and query support. Each virtual node in TDengine is allocated a separate cache pool when it is created. Each virtual node manages its own cache pool, and different virtual nodes do not share the pool. All tables belonging to each virtual node share the cache pool owned by itself. + +TDengine manages the memory pool by blocks, and the data is stored in the form of rows within. The memory pool of a vnode is allocated by blocks when the vnode is created, and each memory block is managed according to the First-In-First-Out strategy. When creating a memory pool, the size of the blocks is determined by the system configuration parameter cache; The number of memory blocks in each vnode is determined by the configuration parameter blocks. So for a vnode, the total memory size is: cache * blocks. A cache block needs to ensure that each table can store at least dozens of records in order to be efficient. + +You can quickly obtain the last record of a table or a STable through the function last_row, which is very convenient to show the real-time status or collected values of each device on a large screen. For example: + +```mysql +select last_row(voltage) from meters where location='Beijing.Chaoyang'; +``` + +This SQL statement will obtain the last recorded voltage value of all smart meters located in Chaoyang District, Beijing. + +## Alert + +In scenarios of TDengine, alarm monitoring is a common requirement. Conceptually, it requires the program to filter out data that meet certain conditions from the data of the latest period of time, and calculate a result according to a defined formula based on these data. When the result meets certain conditions and lasts for a certain period of time, it will notify the user in some form. + +In order to meet the needs of users for alarm monitoring, TDengine provides this function in the form of an independent module. For its installation and use, please refer to the blog [How to Use TDengine for Alarm Monitoring](https://www.taosdata.com/blog/2020/04/14/1438.html). \ No newline at end of file diff --git a/documentation20/en/08.connector/docs.md b/documentation20/en/08.connector/docs.md new file mode 100644 index 0000000000000000000000000000000000000000..9cbd3952068d8eac23ffa9bcd7497ff158a21d86 --- /dev/null +++ b/documentation20/en/08.connector/docs.md @@ -0,0 +1,1046 @@ +# Connectors + +TDengine provides many connectors for development, including C/C++, JAVA, Python, RESTful, Go, Node.JS, etc. + +![image-connector](page://images/connector.png) + +At present, TDengine connectors support a wide range of platforms, including hardware platforms such as X64/X86/ARM64/ARM32/MIPS/Alpha, and development environments such as Linux/Win64/Win32. The comparison matrix is as follows: + +| **CPU** | **X64 64bit** | **X64 64bit** | **X64 64bit** | **X86 32bit** | **ARM64** | **ARM32** | **MIPS Godson** | **Alpha Sunway** | **X64 TimecomTech** | +| ----------- | ------------- | ------------- | ------------- | ------------- | --------- | --------- | --------------- | ----------------- | ------------------- | +| **OS** | **Linux** | **Win64** | **Win32** | **Win32** | **Linux** | **Linux** | **Linux** | **Linux** | **Linux** | +| **C/C++** | ● | ● | ● | ○ | ● | ● | ○ | ○ | ○ | +| **JDBC** | ● | ● | ● | ○ | ● | ● | ○ | ○ | ○ | +| **Python** | ● | ● | ● | ○ | ● | ● | ○ | -- | ○ | +| **Go** | ● | ● | ● | ○ | ● | ● | ○ | -- | -- | +| **NodeJs** | ● | ● | ○ | ○ | ● | ● | ○ | -- | -- | +| **C#** | ○ | ● | ● | ○ | ○ | ○ | ○ | -- | -- | +| **RESTful** | ● | ● | ● | ● | ● | ● | ○ | ○ | ○ | + +Note: ● stands for that has been verified by official tests; ○ stands for that has been verified by unofficial tests. + +Note: + +- To access the TDengine database through connectors (except RESTful) in the system without TDengine server software, it is necessary to install the corresponding version of the client installation package to make the application driver (the file name is [libtaos.so](http://libtaos.so/) in Linux system and taos.dll in Windows system) installed in the system, otherwise, the error that the corresponding library file cannot be found will occur. +- All APIs that execute SQL statements, such as `tao_query`, `taos_query_a`, `taos_subscribe` in C/C++ Connector, and APIs corresponding to them in other languages, can only execute one SQL statement at a time. If the actual parameters contain multiple statements, their behavior is undefined. +- Users upgrading to TDengine 2.0. 8.0 must update the JDBC connection. TDengine must upgrade taos-jdbcdriver to 2.0.12 and above. +- No matter which programming language connector is selected, TDengine version 2.0 and above recommends that each thread of database application establish an independent connection or establish a connection pool based on threads to avoid mutual interference between threads of "USE statement" state variables in the connection (but query and write operations of the connection are thread-safe). + +## Steps of Connector Driver Installation + +The server should already have the TDengine server package installed. The connector driver installation steps are as follows: + +**Linux** + +**1. Download from TAOS Data website(https://www.taosdata.com/cn/all-downloads/)** + +* X64 hardware environment: TDengine-client-2.x.x.x-Linux-x64.tar.gz +* ARM64 hardware environment: TDengine-client-2.x.x.x-Linux-aarch64.tar.gz +* ARM32 hardware environment: TDengine-client-2.x.x.x-Linux-aarch32.tar.gz + +**2. Unzip the package** + +Place the package in any directory that current user can read/write, and then execute following command: + +`tar -xzvf TDengine-client-xxxxxxxxx.tar.gz` + +Where xxxxxx needs to be replaced with you actual version as a string. + +**3. Execute installation script** + +After extracting the package, you will see the following files (directories) in the extracting directory: + +*install_client. sh*: Installation script for application driver + +*taos.tar.gz*: Application driver installation package + +*driver*: TDengine application driver + +*connector*: Connectors for various programming languages (go/grafanaplugin/nodejs/python/JDBC) + +*Examples*: Sample programs for various programming languages (C/C #/go/JDBC/MATLAB/python/R) + +Run install_client.sh to install. + +**4. Configure taos.cfg** + +Edit the taos.cfg file (default path/etc/taos/taos.cfg) and change firstEP to End Point of the TDengine server, for example: [h1.taos.com](http://h1.taos.com/):6030. + +**Tip: If no TDengine service deployed in this machine, but only the application driver is installed, only firstEP needs to be configured in taos.cfg, and FQDN does not.** + +**Windows x64/x86** + +**1. Download from TAOS Data website(https://www.taosdata.com/cn/all-downloads/)** + +* X64 hardware environment: TDengine-client-2.X.X.X-Windows-x64.exe +* X86 hardware environment: TDengine-client-2.X.X.X-Windows-x86.exe + +**2. Execute installation, select default values as prompted to complete** + +**3. Installation path** + +Default installation path is: C:\TDengine, with following files(directories): + +*taos.exe*: taos shell command line program + +*cfg*: configuration file directory + +*driver*: application driver dynamic link library + +*examples*: sample program bash/C/C #/go/JDBC/Python/Node.js + +*include*: header file + +*log*: log file + +*unins000. exe*: uninstall program + +**4. Configure taos.cfg** + +Edit the taos.cfg file (default path/etc/taos/taos.cfg) and change firstEP to End Point of the TDengine server, for example: [h1.taos.com](http://h1.taos.com/):6030. + +**Note:** + +**1. If you use FQDN to connect to the server, you must confirm that the DNS of the local network environment has been configured, or add FQDN addressing records in the hosts file. For example, edit C:\ Windows\ system32\ drivers\ etc\ hosts, and add the following record: 192.168. 1.99 [h1.taos.com](http://h1.taos.com/)** + +**2. Uninstall: Run unins000. exe to uninstall the TDengine application driver.** + +**Installation verification** + +After the above installation and configuration completed, and confirm that the TDengine service has started running normally, the taos client can be logged in at this time. + +**Linux environment:** + +If you execute taos directly under Linux shell, you should be able to connect to tdengine service normally and jump to taos shell interface. For Example: + +```mysql +$ taos +Welcome to the TDengine shell from Linux, Client Version:2.0.5.0 +Copyright (c) 2017 by TAOS Data, Inc. All rights reserved. +taos> show databases; +name | created_time | ntables | vgroups | replica | quorum | days | keep1,keep2,keep(D) | cache(MB)| blocks | minrows | maxrows | wallevel | fsync | comp | precision | status | +========================================================================================================================================================================================================================= +test | 2020-10-14 10:35:48.617 | 10 | 1 | 1 | 1 | 2 | 3650,3650,3650 | 16| 6 | 100 | 4096 | 1 | 3000 | 2 | ms | ready | +log | 2020-10-12 09:08:21.651 | 4 | 1 | 1 | 1 | 10 | 30,30,30 | 1| 3 | 100 | 4096 | 1 | 3000 | 2 | us | ready | +Query OK, 2 row(s) in set (0.001198s) +taos> +``` + +**Windows (x64/x86) environment:** + +Under cmd, enter the c:\ tdengine directory and directly execute taos.exe, and you should be able to connect to tdengine service normally and jump to taos shell interface. For example: + +```mysql + C:\TDengine>taos + Welcome to the TDengine shell from Linux, Client Version:2.0.5.0 + Copyright (c) 2017 by TAOS Data, Inc. All rights reserved. + taos> show databases; + name | created_time | ntables | vgroups | replica | quorum | days | keep1,keep2,keep(D) | cache(MB) | blocks | minrows | maxrows | wallevel | fsync | comp | precision | status | + =================================================================================================================================================================================================================================================================== + test | 2020-10-14 10:35:48.617 | 10 | 1 | 1 | 1 | 2 | 3650,3650,3650 | 16 | 6 | 100 | 4096 | 1 | 3000 | 2 | ms | ready | + log | 2020-10-12 09:08:21.651 | 4 | 1 | 1 | 1 | 10 | 30,30,30 | 1 | 3 | 100 | 4096 | 1 | 3000 | 2 | us | ready | + Query OK, 2 row(s) in set (0.045000s) + taos> +``` + +## C/C++ Connector + +**Systems supported by C/C++ connectors as follows:** + +| **CPU Type** | **x64****(****64bit****)** | | | **ARM64** | **ARM32** | +| -------------------- | ---------------------------- | ------- | ------- | --------- | ------------------ | +| **OS Type** | Linux | Win64 | Win32 | Linux | Linux | +| **Supported or Not** | Yes | **Yes** | **Yes** | **Yes** | **In development** | + +The C/C++ API is similar to MySQL's C API. When application use it, it needs to include the TDengine header file taos.h (after installed, it is located in/usr/local/taos/include): + +```C +#include +``` + +Note: + +- The TDengine dynamic library needs to be linked at compiling. The library in Linux is [libtaos.so](http://libtaos.so/), which installed at/usr/local/taos/driver. By Windows, it is taos.dll and installed at C:\ TDengine. +- Unless otherwise specified, when the return value of API is an integer, 0 represents success, others are error codes representing the cause of failure, and when the return value is a pointer, NULL represents failure. + +More sample codes for using C/C++ connectors, please visit https://github.com/taosdata/TDengine/tree/develop/tests/examples/c. + +### Basic API + +The basic API is used to create database connections and provide a runtime environment for the execution of other APIs. + +- `void taos_init()` + +Initialize the running environment. If the application does not actively call the API, the API will be automatically called when the application call taos_connect, so the application generally does not need to call the API manually. + +- `void taos_cleanup()` + +Clean up the running environment and call this API before the application exits. + +- `int taos_options(TSDB_OPTION option, const void * arg, ...)` + +Set client options, currently only time zone setting (_TSDB_OPTIONTIMEZONE) and encoding setting (_TSDB_OPTIONLOCALE) are supported. The time zone and encoding default to the current operating system settings. + +- `char *taos_get_client_info()` + +Get version information of the client. + +- `TAOS *taos_connect(const char *host, const char *user, const char *pass, const char *db, int port)` + +Create a database connection and initialize the connection context. The parameters that need to be provided by user include: + +* host: FQDN used by TDengine to manage the master node +* user: User name +* pass: Password +* db: Database name. If user does not provide it, it can be connected normally, means user can create a new database through this connection. If user provides a database name, means the user has created the database and the database is used by default +* port: Port number + +A null return value indicates a failure. The application needs to save the returned parameters for subsequent API calls. + +- `char *taos_get_server_info(TAOS *taos)` + +Get version information of the server-side. + +- `int taos_select_db(TAOS *taos, const char *db)` + +Set the current default database to db. + +- `void taos_close(TAOS *taos)` + +Close the connection, where `taos` is the pointer returned by `taos_connect` function. + +### Synchronous query API + +Traditional database operation APIs all make synchronous operations. After the application calls an API, it remains blocked until the server returns the result. TDengine supports the following APIs: + +- `TAOS_RES* taos_query(TAOS *taos, const char *sql)` + +This API is used to execute SQL statements, which can be DQL, DML or DDL statements. Where `taos` parameter is a pointer obtained through `taos_connect`. You can't judge whether the execution result fails by whether the return value is NULL, but to use `taos_errno` function to parse the error code in the result set. + +- `int taos_result_precision(TAOS_RES *res)` + +The precision of the timestamp field in the returned result set, `0` for milliseconds, `1` for microseconds, and `2` for nanoseconds. + +- `TAOS_ROW taos_fetch_row(TAOS_RES *res)` + +Get the data in the query result set by rows. + +- `int taos_fetch_block(TAOS_RES *res, TAOS_ROW *rows)` + +The data in the query result set is obtained in batch, and the return value is the number of rows of the obtained data. + +- `int taos_num_fields(TAOS_RES *res)` 和 `int taos_field_count(TAOS_RES *res)` + +The two APIs are equivalent, and are used to get the number of columns in the query result set. + +- `int* taos_fetch_lengths(TAOS_RES *res)` + +Get the length of each field in the result set. The return value is an array whose length is the number of columns in the result set. + +- `int taos_affected_rows(TAOS_RES *res)` + +Get the number of rows affected by the executed SQL statement. + +- `TAOS_FIELD *taos_fetch_fields(TAOS_RES *res)` + +Get the attributes (data type, name, number of bytes) of each column of data in the query result set, which can be used in conjunction with `taos_num_files` to parse the data of a tuple (one row) returned by `taos_fetch_row`. The structure of `TAOS_FIELD` is as follows: + +```c +typedef struct taosField { + char name[65]; // Column name + uint8_t type; // Data type + int16_t bytes; // Number of bytes +} TAOS_FIELD; +``` + +- `void taos_stop_query(TAOS_RES *res)` + +Stop the execution of a query. + +- `void taos_free_result(TAOS_RES *res)` + +Release the query result set and related resources. After the query is completed, be sure to call the API to release resources, otherwise it may lead to application memory leakage. However, it should also be noted that after releasing resources, if you call functions such as `taos_consume` to obtain query results, it will lead the application to Crash. + +- `char *taos_errstr(TAOS_RES *res)` + +Get the reason why the last API call failed, and the return value is a string. + +- `char *taos_errno(TAOS_RES *res)` + +Get the reason why the last API call failed, and the return value is the error code. + +**Note:** TDengine 2.0 and above recommends that each thread of a database application establish an independent connection or establish a connection pool based on threads. It is not recommended to pass the connection (TAOS\*) structure to different threads for sharing in applications. Query and write operations based on TAOS structure have multithread safety, but state variables such as "USE statement" may interfere with each other among threads. In addition, C connector can dynamically establish new database-oriented connections according to requirements (this process is not visible to users), and it is recommended to call `taos_close` to close the connection only when the program finally exits. + +### Asynchronous query API + +In addition to synchronous API, TDengine also provides higher performance asynchronous call API to handle data insertion and query operations. Under the same software and hardware environment, asynchronous API processes data insertion 2 ~ 4 times faster than synchronous API. Asynchronous API adopts a non-blocking call mode and returns immediately before the system really completes a given database operation. The calling thread can handle other work, thus improving the performance of the whole application. Asynchronous API has outstanding advantages in the case of poor network delay. + +Asynchronous APIs all need applications to provide corresponding callback function. The callback function parameters are set as follows: the first two parameters are consistent, and the third parameter depends on different APIs. The first parameter param is provided to the system when the application calls the asynchronous API. When used for callback, the application can retrieve the context of the specific operation, depending on the specific implementation. The second parameter is the result set of SQL operation. If it is empty, such as insert operation, it means that there is no record returned. If it is not empty, such as select operation, it means that there is record returned. + +Asynchronous APIs have relatively high requirements for users, who can selectively use them according to specific application scenarios. Here are three important asynchronous APIs: + +- `void taos_query_a(TAOS *taos, const char *sql, void (*fp)(void *param, TAOS_RES *, int code), void *param);` + Execute SQL statement asynchronously. + + * taos: The database connection returned by calling `taos_connect` + * sql: The SQL statement needed to execute + * fp: User-defined callback function, whose third parameter `code` is used to indicate whether the operation is successful, `0` for success, and negative number for failure (call `taos_errstr` to get the reason for failure). When defining the callback function, it mainly handles the second parameter `TAOS_RES *`, which is the result set returned by the query + * param:the parameter for the callback + +- `void taos_fetch_rows_a(TAOS_RES *res, void (*fp)(void *param, TAOS_RES *, int numOfRows), void *param);` + Get the result set of asynchronous queries in batch, which can only be used with `taos_query_a`. Within: + + * res: The result set returned when backcall `taos_query_a` + * fp: Callback function. Its parameter `param` is a user-definable parameter construct passed to the callback function; `numOfRows` is the number of rows of data obtained (not a function of the entire query result set). In the callback function, applications can get each row of the batch records by calling `taos_fetch_rows` forward iteration. After reading all the records in a block, the application needs to continue calling `taos_fetch_rows_a` in the callback function to obtain the next batch of records for processing until the number of records returned (`numOfRows`) is zero (the result is returned) or the number of records is negative (the query fails). + +The asynchronous APIs of TDengine all use non-blocking calling mode. Applications can use multithreading to open multiple tables at the same time, and can query or insert to each open table at the same time. It should be pointed out that the **application client must ensure that the operation on the same table is completely serialized**, that is, when the insertion or query operation on the same table is not completed (when no result returned), the second insertion or query operation cannot be performed. + + + + + +### Parameter binding API + +In addition to calling `taos_query` directly for queries, TDengine also provides a Prepare API that supports parameter binding. Like MySQL, these APIs currently only support using question mark `?` to represent the parameters to be bound, as follows: + +- `TAOS_STMT* taos_stmt_init(TAOS *taos)` + +Create a `TAOS_STMT` object for calling later. + +- `int taos_stmt_prepare(TAOS_STMT *stmt, const char *sql, unsigned long length)` + +Parse a SQL statement and bind the parsing result and parameter information to STMT. If the parameter length is greater than 0, this parameter will be used as the length of the SQL statement. If it is equal to 0, the length of the SQL statement will be automatically judged. + +- `int taos_stmt_bind_param(TAOS_STMT *stmt, TAOS_BIND *bind)` + +For parameter binding, bind points to an array, and it is necessary to ensure that the number and order of elements in this array are exactly the same as the parameters in sql statement. TAOS_BIND is used in the same way as MYSQL_BIND in MySQL and is defined as follows: + +```c +typedef struct TAOS_BIND { + int buffer_type; + void * buffer; + unsigned long buffer_length; // Not in use + unsigned long *length; + int * is_null; + int is_unsigned; // Not in use + int * error; // Not in use +} TAOS_BIND; +``` + +Add the current bound parameters to the batch. After calling this function, you can call `taos_stmt_bind_param` again to bind the new parameters. It should be noted that this function only supports insert/import statements, and if it is other SQL statements such as select, it will return errors. + +- `int taos_stmt_execute(TAOS_STMT *stmt)` + +Execute the prepared statement. At the moment, a statement can only be executed once. + +- `TAOS_RES* taos_stmt_use_result(TAOS_STMT *stmt)` + +Gets the result set of the statement. The result set is used in the same way as when calling nonparameterized. After using it, `taos_free_result` should be called to release resources. + +- `int taos_stmt_close(TAOS_STMT *stmt)` + +Execution completed, release all resources. + +### Continuous query interface + +TDengine provides time-driven real-time stream computing APIs. You can perform various real-time aggregation calculation operations on tables (data streams) of one or more databases at regular intervals. The operation is simple, only APIs for opening and closing streams. The details are as follows: + +- `TAOS_STREAM *taos_open_stream(TAOS *taos, const char *sql, void (*fp)(void *param, TAOS_RES *, TAOS_ROW row), int64_t stime, void *param, void (*callback)(void *))` + +This API is used to create data streams where: + + * taos: Database connection established + * sql: SQL query statement (query statement only) + * fp: user-defined callback function pointer. After each stream computing is completed, TDengine passes the query result (TAOS_ROW), query status (TAOS_RES), and user-defined parameters (PARAM) to the callback function. In the callback function, the user can use taos_num_fields to obtain the number of columns in the result set, and taos_fetch_fields to obtain the type of data in each column of the result set. + * stime: The time when stream computing starts. If it is 0, it means starting from now. If it is not zero, it means starting from the specified time (the number of milliseconds from 1970/1/1 UTC time). + * param: It is a parameter provided by the application for callback. During callback, the parameter is provided to the application + * callback: The second callback function is called when the continuous query stop automatically. + +The return value is NULL, indicating creation failed; the return value is not NULL, indicating creation successful. + +- `void taos_close_stream (TAOS_STREAM *tstr)` + +Close the data flow, where the parameter provided is the return value of `taos_open_stream`. When the user stops stream computing, be sure to close the data flow. + +### Data subscription interface + +The subscription API currently supports subscribing to one or more tables and continuously obtaining the latest data written to the tables through regular polling. + +- `TAOS_SUB *taos_subscribe(TAOS* taos, int restart, const char* topic, const char *sql, TAOS_SUBSCRIBE_CALLBACK fp, void *param, int interval)` + +This function is for starting the subscription service, returning the subscription object in case of success, and NULL in case of failure. Its parameters are: + + * taos: Database connection established + * Restart: If the subscription already exists, do you want to start over or continue with the previous subscription + * Topic: Subject (that is, name) of the subscription. This parameter is the unique identification of the subscription + * sql: The query statement subscribed. This statement can only be a select statement. It should only query the original data, and can only query the data in positive time sequence + * fp: The callback function when the query result is received (the function prototype will be introduced later). It is only used when calling asynchronously, and this parameter should be passed to NULL when calling synchronously + * param: The additional parameter when calling the callback function, which is passed to the callback function as it is by the system API without any processing + * interval: Polling period in milliseconds. During asynchronous call, the callback function will be called periodically according to this parameter; In order to avoid affecting system performance, it is not recommended to set this parameter too small; When calling synchronously, if the interval between two calls to taos_consume is less than this period, the API will block until the interval exceeds this period. + +- `typedef void (*TAOS_SUBSCRIBE_CALLBACK)(TAOS_SUB* tsub, TAOS_RES *res, void* param, int code)` + +In asynchronous mode, the prototype of the callback function has the following parameters: + + * tsub: Subscription object + * res: Query the result set. Note that there may be no records in the result set + * param: Additional parameters supplied by the client when `taos_subscribe` is called + * code: Error code + +- `TAOS_RES *taos_consume(TAOS_SUB *tsub)` + +In synchronous mode, this function is used to get the results of subscription. The user application places it in a loop. If the interval between two calls to `taos_consume` is less than the polling cycle of the subscription, the API will block until the interval exceeds this cycle. If a new record arrives in the database, the API will return the latest record, otherwise it will return an empty result set with no records. If the return value is NULL, it indicates a system error. In asynchronous mode, user program should not call this API. + +- `void taos_unsubscribe(TAOS_SUB *tsub, int keepProgress)` + +Unsubscribe. If the parameter `keepProgress` is not 0, the API will keep the progress information of subscription, and the subsequent call to `taos_subscribe` can continue based on this progress; otherwise, the progress information will be deleted and the data can only be read again. + +## Python Connector + +See [video tutorials](https://www.taosdata.com/blog/2020/11/11/1963.html) for the use of Python connectors. + +### Installation preparation + +- For application driver installation, please refer to [steps of connector driver installation](https://www.taosdata.com/en/documentation/connector#driver) +- python 2.7 or >= 3.4 installed +- pip or pip3 installed + +### Python client installation + +#### Linux + +Users can find the connector package for python2 and python3 in the source code src/connector/python (or tar.gz/connector/python) folder. Users can install it through `pip` command: + +`pip install src/connector/python/linux/python2/` + +or + + `pip3 install src/connector/python/linux/python3/` + +#### Windows + +With Windows TDengine client installed, copy the file "C:\TDengine\driver\taos.dll" to the "C:\ windows\ system32" directory and enter the Windows cmd command line interface: + +```cmd +cd C:\TDengine\connector\python +python -m pip install . +``` + +- If there is no `pip` command on the machine, the user can copy the taos folder under src/connector/python to the application directory for use. For Windows client, after installing the TDengine Windows client, copy C:\ TDengine\driver\taos.dll to the C:\ windows\ system32 directory. + +### How to use + +#### Code sample + +- Import the TDengine client module + +```python +import taos +``` + +- Get the connection and cursor object + +```python +conn = taos.connect(host="127.0.0.1", user="root", password="taosdata", config="/etc/taos") +c1 = conn.cursor() +``` + +- *host* covers all IPs of TDengine server-side, and *config* is the directory where the client configuration files is located +- Write data + +```python +import datetime + +# Create a database +c1.execute('create database db') +c1.execute('use db') +# Create a table +c1.execute('create table tb (ts timestamp, temperature int, humidity float)') +# Insert data +start_time = datetime.datetime(2019, 11, 1) +affected_rows = c1.execute('insert into tb values (\'%s\', 0, 0.0)' %start_time) +# Insert data in batch +time_interval = datetime.timedelta(seconds=60) +sqlcmd = ['insert into tb values'] +for irow in range(1,11): + start_time += time_interval + sqlcmd.append('(\'%s\', %d, %f)' %(start_time, irow, irow*1.2)) +affected_rows = c1.execute(' '.join(sqlcmd)) +``` + +- Query data + +```python +c1.execute('select * from tb') +# pull query result +data = c1.fetchall() +# The result is a list, with each row as an element +numOfRows = c1.rowcount +numOfCols = len(c1.description) +for irow in range(numOfRows): + print("Row%d: ts=%s, temperature=%d, humidity=%f" %(irow, data[irow][0], data[irow][1],data[irow][2])) + +# Use cursor loop directly to pull query result +c1.execute('select * from tb') +for data in c1: + print("ts=%s, temperature=%d, humidity=%f" %(data[0], data[1],data[2])) +``` + +- Create subscription + +```python +# Create a subscription with the topic ‘test’ and a consumption cycle of 1000 milliseconds +# If the first parameter is True, it means restarting the subscription. If it is False and a subscription with the topic 'test 'has been created before, it means continuing to consume the data of this subscription instead of restarting to consume all the data +sub = conn.subscribe(True, "test", "select * from tb;", 1000) +``` + +- Consume subscription data + +```python +data = sub.consume() +for d in data: + print(d) +``` + +- Unsubscription + +```python +sub.close() +``` + +- Close connection + +```python +c1.close() +conn.close() +``` + +#### Using nanosecond in Python connector + +So far Python still does not completely support nanosecond type. Please refer to the link 1 and 2. The implementation of the python connector is to return an integer number for nanosecond value rather than datatime type as what ms and us do. The developer needs to handle it themselves. We recommend using pandas to_datetime() function. If Python officially support nanosecond in the future, TAOS Data might be possible to change the interface accordingly, which mean the application need change too. + +1. https://stackoverflow.com/questions/10611328/parsing-datetime-strings-containing-nanoseconds +2. https://www.python.org/dev/peps/pep-0564/ + +#### Helper + +Users can directly view the usage information of the module through Python's helper, or refer to the sample program in tests/examples/Python. The following are some common classes and methods: + +- *TDengineConnection* class + +Refer to help (taos.TDEngineConnection) in python. This class corresponds to a connection between the client and TDengine. In the scenario of client multithreading, it is recommended that each thread apply for an independent connection instance, but not recommended that multiple threads share a connection. + +- *TDengineCursor* class + +Refer to help (taos.TDengineCursor) in python. This class corresponds to the write and query operations performed by the client. In the scenario of client multithreading, this cursor instance must be kept exclusive to threads and cannot be used by threads, otherwise errors will occur in the returned results. + +- *connect* method + +Used to generate an instance of taos.TDengineConnection. + +### Python client code sample + +In tests/examples/python, we provide a sample Python program read_example. py to guide you to design your own write and query program. After installing the corresponding client, introduce the taos class through `import taos`. The steps are as follows: + +- Get the `TDengineConnection` object through `taos.connect`, which can be applied for only one by a program and shared among multiple threads. + +- Get a new cursor object through the `.cursor ()` method of the `TDengineConnection` object, which must be guaranteed to be exclusive to each thread. + +- Execute SQL statements for writing or querying through the `execute()` method of the cursor object. + +- If a write statement is executed, `execute` returns the number of rows successfully written affected rows. + +- If the query statement is executed, the result set needs to be pulled through the fetchall method after the execution is successful. + + You can refer to the sample code for specific methods. + +## RESTful Connector + +To support the development of various types of platforms, TDengine provides an API that conforms to REST design standards, that is, RESTful API. In order to minimize the learning cost, different from other designs of database RESTful APIs, TDengine directly requests SQL statements contained in BODY through HTTP POST to operate the database, and only needs a URL. See the [video tutorial](https://www.taosdata.com/blog/2020/11/11/1965.html) for the use of RESTful connectors. + +### HTTP request format + +``` +http://:/rest/sql +``` + +Parameter description: + +- IP: Any host in the cluster +- PORT: httpPort configuration item in the configuration file, defaulting to 6041 + +For example: [http://192.168.0.1](http://192.168.0.1/): 6041/rest/sql is a URL that points to an IP address of 192.168. 0.1. + +The header of HTTP request needs to carry identity authentication information. TDengine supports Basic authentication and custom authentication. Subsequent versions will provide standard and secure digital signature mechanism for identity authentication. + +- Custom identity authentication information is as follows (We will introduce later) + +``` +Authorization: Taosd +``` + +- Basic identity authentication information is as follows + +``` +Authorization: Basic +``` + +The BODY of HTTP request is a complete SQL statement. The data table in the SQL statement should provide a database prefix, such as \.\. If the table name does not have a database prefix, the system returns an error. Because the HTTP module is just a simple forwarding, there is no current DB concept. + +Use curl to initiate an HTTP Request through custom authentication. The syntax is as follows: + +```bash +curl -H 'Authorization: Basic ' -d '' :/rest/sql +``` + +or + +```bash +curl -u username:password -d '' :/rest/sql +``` + +Where `TOKEN` is the string of `{username}:{password}` encoded by Base64, for example, `root:taosdata` will be encoded as `cm9vdDp0YW9zZGF0YQ==`. + +### HTTP return format + +The return value is in JSON format, as follows: + +```json +{ + "status": "succ", + "head": ["ts","current", …], + "column_meta": [["ts",9,8],["current",6,4], …], + "data": [ + ["2018-10-03 14:38:05.000", 10.3, …], + ["2018-10-03 14:38:15.000", 12.6, …] + ], + "rows": 2 +} +``` + +Description: + +- status: Informs whether the operation results are successful or failed. +- head: The definition of the table, with only one column "affected_rows" if no result set is returned. (Starting from version 2.0. 17, it is recommended not to rely on the head return value to judge the data column type, but to use column_meta. In future versions, head may be removed from the return value.) +- column_meta: Starting with version 2.0. 17, this item is added to the return value to indicate the data type of each column in the data. Each column will be described by three values: column name, column type and type length. For example, ["current", 6, 4] means that the column name is "current"; the column type is 6, that is, float type; the type length is 4, which corresponds to a float represented by 4 bytes. If the column type is binary or nchar, the type length indicates the maximum content length that the column can save, rather than the specific data length in this return value. When the column type is nchar, its type length indicates the number of Unicode characters that can be saved, not bytes. +- data: The specific returned data, rendered line by line, if no result set is returned, then only [[affected_rows]]. The order of the data columns for each row in data is exactly the same as the order of the data columns described in column_meta. +- rows: Indicates the total number of rows of data. + +Column types in column_meta: + +* 1:BOOL +* 2:TINYINT +* 3:SMALLINT +* 4:INT +* 5:BIGINT +* 6:FLOAT +* 7:DOUBLE +* 8:BINARY +* 9:TIMESTAMP +* 10:NCHAR + +### Custom authorization code + +The HTTP request requires the authorization code `` for identification. Authorization codes are usually provided by administrators. Authorization codes can be obtained simply by sending `HTTP GET` requests as follows: + +```bash +curl http://:6041/rest/login// +``` + +Where `ip` is the IP address of the TDengine database, `username` is the database user name, `password` is the database password, and the return value is in `JSON` format. The meanings of each field are as follows: + +- status: flag bit for request result +- code: code of return value +- desc: Authorization code + +Sample to get authorization code: + +```bash +curl http://192.168.0.1:6041/rest/login/root/taosdata +``` + +Return value: + +```json +{ + "status": "succ", + "code": 0, + "desc": "/KfeAzX/f9na8qdtNZmtONryp201ma04bEl8LcvLUd7a8qdtNZmtONryp201ma04" +} +``` + +### Use case + +- Lookup all records of table d1001 in demo database: + +```bash +curl -H 'Authorization: Basic cm9vdDp0YW9zZGF0YQ==' -d 'select * from demo.d1001' 192.168.0.1:6041/rest/sql +``` + +Return value: + +```json +{ + "status": "succ", + "head": ["ts","current","voltage","phase"], + "column_meta": [["ts",9,8],["current",6,4],["voltage",4,4],["phase",6,4]], + "data": [ + ["2018-10-03 14:38:05.000",10.3,219,0.31], + ["2018-10-03 14:38:15.000",12.6,218,0.33] + ], + "rows": 2 +} +``` + +- Create a database demo: + +```bash +curl -H 'Authorization: Basic cm9vdDp0YW9zZGF0YQ==' -d 'create database demo' 192.168.0.1:6041/rest/sql +``` + +Return value: + +```json +{ + "status": "succ", + "head": ["affected_rows"], + "column_meta": [["affected_rows",4,4]], + "data": [[1]], + "rows": 1 +} +``` + +### Other cases + +### Result set in Unix timestamp + +When the HTTP request URL is sqlt, the timestamp of the returned result set will be expressed in Unix timestamp format, for example: + +```bash +curl -H 'Authorization: Basic cm9vdDp0YW9zZGF0YQ==' -d 'select * from demo.d1001' 192.168.0.1:6041/rest/sqlt +``` + +Return value: + +```json +{ + "status": "succ", + "head": ["ts","current","voltage","phase"], + "column_meta": [["ts",9,8],["current",6,4],["voltage",4,4],["phase",6,4]], + "data": [ + [1538548685000,10.3,219,0.31], + [1538548695000,12.6,218,0.33] + ], + "rows": 2 +} +``` + +#### Result set in UTC time string + +When the HTTP request URL is `sqlutc`, the timestamp of the returned result set will be represented by a UTC time string, for example: + +```bash + curl -H 'Authorization: Basic cm9vdDp0YW9zZGF0YQ==' -d 'select * from demo.t1' 192.168.0.1:6041/rest/sqlutc +``` + +Return value: + +```json +{ + "status": "succ", + "head": ["ts","current","voltage","phase"], + "column_meta": [["ts",9,8],["current",6,4],["voltage",4,4],["phase",6,4]], + "data": [ + ["2018-10-03T14:38:05.000+0800",10.3,219,0.31], + ["2018-10-03T14:38:15.000+0800",12.6,218,0.33] + ], + "rows": 2 +} +``` + +### Important configuration options + +Only some configuration parameters related to RESTful interface are listed below. Please refer to the instructions in the configuration file for other system parameters. Note: After the configuration is modified, the taosd service needs to be restarted before it can take effect. + +- httpPort: The port number that provides RESTful services externally, which is bound to 6041 by default +- httpMaxThreads: The number of threads started, the default is 2 (starting with version 2.0. 17, the default value is changed to half of the CPU cores and rounded down) +- restfulRowLimit: The maximum number of result sets returned (in JSON format), default 10240 +- httpEnableCompress: Compression is not supported by default. Currently, TDengine only supports gzip compression format +- httpdebugflag: Logging switch, 131: error and alarm information only, 135: debugging information, 143: very detailed debugging information, default 131 + + + +## CSharp Connector + +The C # connector supports: Linux 64/Windows x64/Windows x86. + +### Installation preparation + +- For application driver installation, please refer to the[ steps of installing connector driver](https://www.taosdata.com/en/documentation/connector#driver). +- . NET interface file TDengineDrivercs.cs and reference sample TDengineTest.cs are both located in the Windows client install_directory/examples/C# directory. +- On Windows, C # applications can use the native C interface of TDengine to perform all database operations, and future versions will provide the ORM (Dapper) framework driver. + +### Installation verification + +Run install_directory/examples/C#/C#Checker/C#Checker.exe + +```cmd +cd {install_directory}/examples/C#/C#Checker +csc /optimize *.cs +C#Checker.exe -h +``` + +### How to use C# connector + +On Windows system, .NET applications can use the .NET interface of TDengine to perform all database operations. The steps to use it are as follows: + +1. Add the. NET interface file TDengineDrivercs.cs to the .NET project where the application is located. +2. Users can refer to TDengineTest.cs to define database connection parameters and how to perform data insert, query and other operations; + +This. NET interface requires the taos.dll file, so before executing the application, copy the taos.dll file in the Windows client install_directory/driver directory to the folder where the. NET project finally generated the .exe executable file. After running the exe file, you can access the TDengine database and do operations such as insert and query. + +**Note:** + +1. TDengine V2.0. 3.0 supports both 32-bit and 64-bit Windows systems, so when. NET project generates a .exe file, please select the corresponding "X86" or "x64" for the "Platform" under "Solution"/"Project". +2. This. NET interface has been verified in Visual Studio 2015/2017, and other VS versions have yet to be verified. + +### Third-party Driver + +Maikebing.Data.Taos is an ADO.Net provider for TDengine that supports Linux, Windows. This development package is provided by enthusiastic contributor 麦壳饼@@maikebing. For more details: + +``` +// Download +https://github.com/maikebing/Maikebing.EntityFrameworkCore.Taos +// How to use +https://www.taosdata.com/blog/2020/11/02/1901.html +``` + +## Go Connector + +### Installation preparation + +- For application driver installation, please refer to the [steps of installing connector driver](https://www.taosdata.com/en/documentation/connector#driver). + +The TDengine provides the GO driver taosSql. taosSql implements the GO language's built-in interface database/sql/driver. Users can access TDengine in the application by simply importing the package as follows, see https://github.com/taosdata/driver-go/blob/develop/taosSql/driver_test.go for details. + +Sample code for using the Go connector can be found in https://github.com/taosdata/TDengine/tree/develop/tests/examples/go and the [video tutorial](https://www.taosdata.com/blog/2020/11/11/1951.html). + +```Go +import ( + "database/sql" + _ "github.com/taosdata/driver-go/taosSql" +) +``` + +**It is recommended to use Go version 1.13 or above and turn on module support:** + +```bash +go env -w GO111MODULE=on +go env -w GOPROXY=https://goproxy.io,direct +``` + +### Common APIs + +- `sql.Open(DRIVER_NAME string, dataSourceName string) *DB` + +This API is used to open DB and return an object of type \* DB. Generally, DRIVER_NAME is set to the string `taosSql`, and dataSourceName is set to the string `user:password@/tcp(host:port)/dbname`. If the customer wants to access TDengine with multiple goroutines concurrently, it is necessary to create a `sql.Open` object in each goroutine and use it to access TDengine. + +**Note**: When the API is successfully created, there is no permission check. Only when Query or Exec is actually executed can the connection be truly created and whether the user/password/host/port is legal can be checked at the same time. In addition, because most of the implementation of the whole driver sinks into libtaos, which taosSql depends on. Therefore, sql.Open itself is particularly lightweight. + +- `func (db *DB) Exec(query string, args ...interface{}) (Result, error)` + +`sql.Open` built-in method to execute non-query related SQL + +- `func (db *DB) Query(query string, args ...interface{}) (*Rows, error)` + +`sql.Open` built-in method used to execute query statements + +- `func (db *DB) Prepare(query string) (*Stmt, error)` + +`sql.Open` built-in method used to create a prepared statement for later queries or executions. + +- `func (s *Stmt) Exec(args ...interface{}) (Result, error)` + +`sql.Open` built-in method to execute a prepared statement with the given arguments and returns a Result summarizing the effect of the statement. + +- `func (s *Stmt) Query(args ...interface{}) (*Rows, error)` + +`sql.Open` built-in method to query executes a prepared query statement with the given arguments and returns the query results as a \*Rows. + +- `func (s *Stmt) Close() error` + +`sql.Open` built-in method to closes the statement. + +## Node.js Connector + +The Node.js connector supports the following systems: + +| **CPU Type** | x64(64bit) | | | aarch64 | aarch32 | +| -------------------- | ---------------------------- | ------- | ------- | ----------- | ----------- | +| **OS Type** | Linux | Win64 | Win32 | Linux | Linux | +| **Supported or Not** | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** | + +See the [video tutorial](https://www.taosdata.com/blog/2020/11/11/1957.html) for use of the Node.js connector. + +### Installation preparation + +- For application driver installation, please refer to the [steps of installing connector driver](https://www.taosdata.com/en/documentation/connector#driver). + +### Install Node.js connector + +Users can install it through [npm](https://www.npmjs.com/) or through the source code src/connector/nodejs/. The specific installation steps are as follows: + +First, install the node.js connector through [npm](https://www.npmjs.com/). + +```bash +npm install td2.0-connector +``` + +We recommend that use npm to install the node.js connector. If you do not have npm installed, you can copy src/connector/nodejs/ to your nodejs project directory. + +We use [node-gyp](https://github.com/nodejs/node-gyp) to interact with the TDengine server. Before installing the node.js connector, you also need to install the following software: + +### Linux + +- python (recommended v2.7, not currently supported in v3.x.x) +- node 2.0. 6 supports v12. x and v10. x, 2.0. 5 and earlier supports v10. x, and other versions may have package compatibility issues. +- make +- [GCC](https://gcc.gnu.org/) and other C compilers + +### Windows + +#### Solution 1 + +Use Microsoft [windows-build-tools](https://github.com/felixrieseberg/windows-build-tools) to install all necessary tools by executing npm install --global --production windows-build-tools in cmd command line interface. + +#### Solution 2 + +Manually install the following tools: + +- Install Visual Studio related tools: [Visual Studio Build Tools](https://visualstudio.microsoft.com/thank-you-downloading-visual-studio/?sku=BuildTools) or [Visual Studio 2017 Community](https://visualstudio.microsoft.com/pl/thank-you-downloading-visual-studio/?sku=Community) +- Install [Python](https://www.python.org/downloads/) 2.7 (not supported in v3.x.x) and execute npm config set python python2.7 +- Open `cmd`, `npm config set msvs_version 2017` + +If the steps above cannot be performed successfully, you can refer to Microsoft's Node.js User Manual [Microsoft's Node.js Guidelines for Windows](https://github.com/Microsoft/nodejs-guidelines/blob/master/windows-environment.md#compiling-native-addon-modules). + +If you use ARM64 Node.js on Windows 10 ARM, you also need to add "Visual C++ compilers and libraries for ARM64" and "Visual C++ ATL for ARM64". + +#### Sample + +The sample program source code is located in install_directory/examples/nodejs, and there are: + +Node-example.js node.js sample source code Node-example-raw. js + +### Installation verification + +After installing the TDengine client, the nodejsChecker.js program can verify whether the current environment supports access to TDengine via nodejs. + +Steps: + +1. Create a new installation verification directory, for example: ~/tdengine-test, copy the nodejsChecker.js source program on github. Download address: (https://github.com/taosdata/TDengine/tree/develop/tests/examples/nodejs/nodejsChecker.js). + +2. Execute the following command: + + ```bash + npm init -y + npm install td2.0-connector + node nodejsChecker.js host=localhost + ``` + +3. After performing the above steps, the nodejs connection Tdengine instance will be outputted on the command line, and the short-answer of insertion and query will be executed. + +### How to use Node.js + +The following are some basic uses of node.js connector. Please refer to [TDengine Node.js connector](http://docs.taosdata.com/node) for details. + +### Create connection + +When using the node.js connector, you must execute require `td2.0-connector`, and then use the `taos.connect` function. The parameter that `taos.connect` function must provide is `host`, and other parameters will use the following default values if they are not provided. Finally, the `cursor` needs to be initialized to communicate with the TDengine server-side. + +```javascript +const taos = require('td2.0-connector'); +var conn = taos.connect({host:"127.0.0.1", user:"root", password:"taosdata", config:"/etc/taos",port:0}) +var cursor = conn.cursor(); // Initializing a new cursor +``` + +To close the connect: + +```javascript +conn.close(); +``` + +#### To execute SQL and insert data + +For DDL statements (such as `create database`, `create table`, `use`, and so on), you can use the `execute` method of `cursor`. The code is as follows: + +```js +cursor.execute('create database if not exists test;') +``` + +The above code creates a database named test. For DDL statements, there is generally no return value, and the execute return value of `cursor` is 0. + +For Insert statements, the code is as follows: + +```js +var affectRows = cursor.execute('insert into test.weather values(now, 22.3, 34);') +``` + +The return value of the execute method is the number of rows affected by the statement. If the sql above inserts a piece of data into the weather table of the test database, the return value affectRows is 1. + +TDengine does not currently support update and delete statements. + +#### Query + +You can query the database through `cursor.query` function. + +```javascript +var query = cursor.query('show databases;') +``` + +The results of the query can be obtained and printed through `query.execute()` function: + +```javascript +var promise = query.execute(); +promise.then(function(result) { + result.pretty(); +}); +``` + +You can also use the `bind` method of `query` to format query statements. For example: `query` automatically fills the `?` with the value provided in the query statement . + +```javascript +var query = cursor.query('select * from meterinfo.meters where ts <= ? and areaid = ?;').bind(new Date(), 5); +query.execute().then(function(result) { + result.pretty(); +}) +``` + +If you provide the second parameter in the `query` statement and set it to `true`, you can also get the query results immediately. As follows: + +```javascript +var promise = cursor.query('select * from meterinfo.meters where v1 = 30;', true) +promise.then(function(result) { + result.pretty(); +}) +``` + +#### Asynchronous function + +The operation of asynchronous query database is similar to the above, only by adding `_a` after `cursor.execute`, `TaosQuery.execute` and other functions. + +```javascript +var promise1 = cursor.query('select count(*), avg(v1), avg(v2) from meter1;').execute_a() +var promise2 = cursor.query('select count(*), avg(v1), avg(v2) from meter2;').execute_a(); +promise1.then(function(result) { + result.pretty(); +}) +promise2.then(function(result) { + result.pretty(); +}) +``` + +### Example + +[node-example.js](https://github.com/taosdata/TDengine/tree/master/tests/examples/nodejs/node-example.js) provides a code example that uses the NodeJS connector to create a table, insert weather data, and query the inserted data. + +[node-example-raw.js](https://github.com/taosdata/TDengine/tree/master/tests/examples/nodejs/node-example-raw.js) is also a code example that uses the NodeJS connector to create a table, insert weather data, and query the inserted data, but unlike the above, this example only uses cursor. diff --git a/documentation20/en/09.connections/docs.md b/documentation20/en/09.connections/docs.md new file mode 100644 index 0000000000000000000000000000000000000000..e759da31677a5344a0f6578c751c4b77f86a43db --- /dev/null +++ b/documentation20/en/09.connections/docs.md @@ -0,0 +1,157 @@ +# Connections with Other Tools + +## Grafana + +TDengine can quickly integrate with [Grafana](https://www.grafana.com/), an open source data visualization system, to build a data monitoring and alarming system. The whole process does not require any code to write. The contents of the data table in TDengine can be visually showed on DashBoard. + +### Install Grafana + +TDengine currently supports Grafana 5.2.4 and above. You can download and install the package from Grafana website according to the current operating system. The download address is as follows: + +https://grafana.com/grafana/download. + +### Configure Grafana + +TDengine Grafana plugin is in the /usr/local/taos/connector/grafanaplugin directory. + +Taking Centos 7.2 as an example, just copy grafanaplugin directory to /var/lib/grafana/plugins directory and restart Grafana. + +```bash +sudo cp -rf /usr/local/taos/connector/grafanaplugin /var/lib/grafana/plugins/tdengine +``` + +### Use Grafana + +#### Configure data source + +You can log in the Grafana server (username/password:admin/admin) through localhost:3000, and add data sources through `Configuration -> Data Sources` on the left panel, as shown in the following figure: + +![img](page://images/connections/add_datasource1.jpg) + +Click `Add data source` to enter the Add Data Source page, and enter TDengine in the query box to select Add, as shown in the following figure: + +![img](page://images/connections/add_datasource2.jpg) + +Enter the data source configuration page and modify the corresponding configuration according to the default prompt: + +![img](page://images/connections/add_datasource3.jpg) + +- Host: IP address of any server in TDengine cluster and port number of TDengine RESTful interface (6041), default [http://localhost:6041](http://localhost:6041/) +- User: TDengine username. +- Password: TDengine user password. + +Click `Save & Test` to test. Success will be prompted as follows: + +![img](page://images/connections/add_datasource4.jpg) + +#### Create Dashboard + +Go back to the home to create Dashboard, and click `Add Query` to enter the panel query page: + +![img](page://images/connections/create_dashboard1.jpg) + +As shown in the figure above, select the TDengine data source in Query, and enter the corresponding sql in the query box below to query. Details are as follows: + +- INPUT SQL: Enter the statement to query (the result set of the SQL statement should be two columns and multiple rows), for example: `select avg(mem_system) from log.dn where ts >= $from and ts < $to interval($interval)` , where `from`, `to` and `interval` are built-in variables of the TDengine plug-in, representing the query range and time interval obtained from the Grafana plug-in panel. In addition to built-in variables, it is also supported to use custom template variables. +- ALIAS BY: You can set alias for the current queries. +- GENERATE SQL: Clicking this button will automatically replace the corresponding variable and generate the final statement to execute. + +According to the default prompt, query the average system memory usage at the specified interval of the server where the current TDengine deployed in as follows: + +![img](page://images/connections/create_dashboard2.jpg) + +> Please refer to Grafana [documents](https://grafana.com/docs/) for how to use Grafana to create the corresponding monitoring interface and for more about Grafana usage. + +#### Import Dashboard + +A `tdengine-grafana.json` importable dashboard is provided under the Grafana plug-in directory/usr/local/taos/connector/grafana/tdengine/dashboard/. + +Click the `Import` button on the left panel and upload the `tdengine-grafana.json` file: + +![img](page://images/connections/import_dashboard1.jpg) + +You can see as follows after Dashboard imported. + +![img](page://images/connections/import_dashboard2.jpg) + +## MATLAB + +MATLAB can access data to the local workspace by connecting directly to the TDengine via the JDBC Driver provided in the installation package. + +### JDBC Interface Adaptation of MATLAB + +Several steps are required to adapt MATLAB to TDengine. Taking adapting MATLAB2017a on Windows10 as an example: + +- Copy the file JDBCDriver-1.0.0-dist.ja*r* in TDengine package to the directory ${matlab_root}\MATLAB\R2017a\java\jar\toolbox +- Copy the file taos.lib in TDengine package to ${matlab root dir}\MATLAB\R2017a\lib\win64 +- Add the .jar package just copied to the MATLAB classpath. Append the line below as the end of the file of ${matlab root dir}\MATLAB\R2017a\toolbox\local\classpath.txt +- ``` + $matlabroot/java/jar/toolbox/JDBCDriver-1.0.0-dist.jar + ``` + +- Create a file called javalibrarypath.txt in directory ${user_home}\AppData\Roaming\MathWorks\MATLAB\R2017a_, and add the _taos.dll path in the file. For example, if the file taos.dll is in the directory of C:\Windows\System32,then add the following line in file javalibrarypath.txt: +- ``` + C:\Windows\System32 + ``` + +- ### Connect to TDengine in MATLAB to get data + +After the above configured successfully, open MATLAB. + +- Create a connection: + +```matlab +conn = database(‘db’, ‘root’, ‘taosdata’, ‘com.taosdata.jdbc.TSDBDriver’, ‘jdbc:TSDB://127.0.0.1:0/’) +``` + +* Make a query: + +```matlab +sql0 = [‘select * from tb’] +data = select(conn, sql0); +``` + +* Insert a record: + +```matlab +sql1 = [‘insert into tb values (now, 1)’] +exec(conn, sql1) +``` + +For more detailed examples, please refer to the examples\Matlab\TDEngineDemo.m file in the package. + +## R + +R language supports connection to the TDengine database through the JDBC interface. First, you need to install the JDBC package of R language. Launch the R language environment, and then execute the following command to install the JDBC support library for R language: + +```R +install.packages('RJDBC', repos='http://cran.us.r-project.org') +``` + +After installed, load the RJDBC package by executing `library('RJDBC')` command. + +Then load the TDengine JDBC driver: + +```R +drv<-JDBC("com.taosdata.jdbc.TSDBDriver","JDBCDriver-2.0.0-dist.jar", identifier.quote="\"") +``` + +If succeed, no error message will display. Then use the following command to try a database connection: + +```R +conn<-dbConnect(drv,"jdbc:TSDB://192.168.0.1:0/?user=root&password=taosdata","root","taosdata") +``` + +Please replace the IP address in the command above to the correct one. If no error message is shown, then the connection is established successfully, otherwise the connection command needs to be adjusted according to the error prompt. TDengine supports below functions in *RJDBC* package: + +- `dbWriteTable(conn, "test", iris, overwrite=FALSE, append=TRUE)`: Write the data in a data frame iris to the table test in the TDengine server. Parameter overwrite must be false. append must be TRUE and the schema of the data frame iris should be the same as the table test. +- `dbGetQuery(conn, "select count(*) from test")`: run a query command +- `dbSendUpdate (conn, "use db")`: Execute any non-query sql statement. For example, `dbSendUpdate (conn, "use db")`, write data `dbSendUpdate (conn, "insert into t1 values (now, 99)")`, and the like. +- `dbReadTable(conn, "test")`: read all the data in table test +- `dbDisconnect(conn)`: close a connection +- `dbRemoveTable(conn, "test")`: remove table test + +The functions below are not supported currently: + +- `dbExistsTable(conn, "test")`: if table test exists +- `dbListTables(conn)`: list all tables in the connection \ No newline at end of file diff --git a/documentation20/en/10.cluster/docs.md b/documentation20/en/10.cluster/docs.md new file mode 100644 index 0000000000000000000000000000000000000000..d7d908ff424270d9aa33f89eefd36e73f6ab68b2 --- /dev/null +++ b/documentation20/en/10.cluster/docs.md @@ -0,0 +1,235 @@ +# TDengine Cluster Management + +Multiple TDengine servers, that is, multiple running instances of taosd, can form a cluster to ensure the highly reliable operation of TDengine and provide scale-out features. To understand cluster management in TDengine 2.0, it is necessary to understand the basic concepts of clustering. Please refer to the chapter "Overall Architecture of TDengine 2.0". And before installing the cluster, please follow the chapter ["Getting started"](https://www.taosdata.com/en/documentation/getting-started/) to install and experience the single node function. + +Each data node of the cluster is uniquely identified by End Point, which is composed of FQDN (Fully Qualified Domain Name) plus Port, such as [h1.taosdata.com](http://h1.taosdata.com/):6030. The general FQDN is the hostname of the server, which can be obtained through the Linux command `hostname -f` (how to configure FQDN, please refer to: [All about FQDN of TDengine](https://www.taosdata.com/blog/2020/09/11/1824.html)). Port is the external service port number of this data node. The default is 6030, but it can be modified by configuring the parameter serverPort in taos.cfg. A physical node may be configured with multiple hostnames, and TDengine will automatically get the first one, but it can also be specified through the configuration parameter fqdn in taos.cfg. If you are accustomed to direct IP address access, you can set the parameter fqdn to the IP address of this node. + +The cluster management of TDengine is extremely simple. Except for manual intervention in adding and deleting nodes, all other tasks are completed automatically, thus minimizing the workload of operation. This chapter describes the operations of cluster management in detail. + +Please refer to the [video tutorial](https://www.taosdata.com/blog/2020/11/11/1961.html) for cluster building. + +## Preparation + +**Step 0:** Plan FQDN of all physical nodes in the cluster, and add the planned FQDN to /etc/hostname of each physical node respectively; modify the /etc/hosts of each physical node, and add the corresponding IP and FQDN of all cluster physical nodes. [If DNS is deployed, contact your network administrator to configure it on DNS] + +**Step 1:** If the physical nodes have previous test data, installed with version 1. x, or installed with other versions of TDengine, please delete it first and drop all data. For specific steps, please refer to the blog "[Installation and Uninstallation of Various Packages of TDengine](https://www.taosdata.com/blog/2019/08/09/566.html)" + +**Note 1:** Because the information of FQDN will be written into a file, if FQDN has not been configured or changed before, and TDengine has been started, be sure to clean up the previous data (`rm -rf /var/lib/taos/*`)on the premise of ensuring that the data is useless or backed up; + +**Note 2:** The client also needs to be configured to ensure that it can correctly parse the FQDN configuration of each node, whether through DNS service or Host file. + +**Step 2:** It is recommended to close the firewall of all physical nodes, and at least ensure that the TCP and UDP ports of ports 6030-6042 are open. It is **strongly recommended** to close the firewall first and configure the ports after the cluster is built; + +**Step 3:** Install TDengine on all physical nodes, and the version must be consistent, **but do not start taosd**. During installation, when prompted to enter whether to join an existing TDengine cluster, press enter for the first physical node directly to create a new cluster, and enter the FQDN: port number (default 6030) of any online physical node in the cluster for the subsequent physical nodes; + +**Step 4:** Check the network settings of all data nodes and the physical nodes where the application is located: + +1. Execute command `hostname -f` on each physical node, and check and confirm that the hostnames of all nodes are different (the node where the application driver is located does not need to do this check). +2. Execute `ping host` on each physical node, wherein host is that hostname of other physical node, and see if other physical nodes can be communicated to; if not, you need to check the network settings, or the /etc/hosts file (the default path for Windows systems is C:\ Windows\ system32\ drivers\ etc\ hosts), or the configuration of DNS. If it fails to ping, then we cann't build the cluster. +3. From the physical node where the application runs, ping the data node where taosd runs. If the ping fails, the application cannot connect to taosd. Please check the DNS settings or hosts file of the physical node where the application is located; +4. The End Point of each data node is the output hostname plus the port number, for example, [h1.taosdata.com](http://h1.taosdata.com/): 6030 + +**Step 5:** Modify the TDengine configuration file (the file/etc/taos/taos.cfg for all nodes needs to be modified). Assume that the first data node End Point to be started is [h1.taosdata.com](http://h1.taosdata.com/): 6030, and its parameters related to cluster configuration are as follows: + +``` +// firstEp is the first data node connected after each data node’s first launch +firstEp h1.taosdata.com:6030 +// Must configure it as the FQDN of this data node. If this machine has only one hostname, you can comment out this configuration +fqdn h1.taosdata.com +// Configure the port number of this data node, the default is 6030 +serverPort 6030 +// For application scenarios, please refer to the section “Use of Arbitrator” +arbitrator ha.taosdata.com:6042 +``` + +The parameters that must be modified are firstEp and fqdn. At each data node, every firstEp needs to be configured to be the same, **but fqdn must be configured to the value of the data node where it is located**. Other parameters may not be modified unless you have clear reasons. + +**The data node dnode added to the cluster must be exactly the same as the 11 parameters in the following table related to the cluster, otherwise it cannot be successfully added to the cluster.** + + + +| **#** | **Configuration Parameter Name** | **Description** | +| ----- | -------------------------------- | ------------------------------------------------------------ | +| 1 | numOfMnodes | Number of management nodes in system | +| 2 | mnodeEqualVnodeNum | A mnode equals to the number of vnodes consumed | +| 3 | offlineThreshold | Offline threshold of dnode to judge if the dnode is offline | +| 4 | statusInterval | The interval for dnode to report its status to mnode | +| 5 | arbitrator | The end point of the arbitrator in system | +| 6 | timezone | Time zone | +| 7 | locale | Location information and coding format of system | +| 8 | charset | Character set encoding | +| 9 | balance | Whether to start load balancing | +| 10 | maxTablesPerVnode | The maximum number of tables that can be created in each vnode | +| 11 | maxVgroupsPerDb | The maximum number of vgroups that can be used per DB | + +## Launch the First Data Node + +Follow the instructions in "[Getting started](https://www.taosdata.com/en/documentation/getting-started/)", launch the first data node, such as [h1.taosdata.com](http://h1.taosdata.com/), then execute taos, start the taos shell, and execute command "show dnodes" from the shell; ", as follows: + +``` +Welcome to the TDengine shell from Linux, Client Version:2.0.0.0 +Copyright (c) 2017 by TAOS Data, Inc. All rights reserved. + +taos> show dnodes; + id | end_point | vnodes | cores | status | role | create_time | +===================================================================================== + 1 | h1.taos.com:6030 | 0 | 2 | ready | any | 2020-07-31 03:49:29.202 | +Query OK, 1 row(s) in set (0.006385s) + +taos> +``` + +In the above command, you can see that the End Point of the newly launched data node is: [h1.taos.com](http://h1.taos.com/): 6030, which is the firstEP of the new cluster. + +## Launch Subsequent Data Nodes + +To add subsequent data nodes to the existing cluster, there are the following steps: + +1. Start taosd at each physical node according to the chapter "[Getting started](https://www.taosdata.com/en/documentation/getting-started/)"; + +2. On the first data node, use CLI program taos to log in to TDengine system and execute the command: + + ``` + CREATE DNODE "h2.taos.com:6030"; + ``` + +Add the End Point of the new data node (learned in Step 4 of the preparation) to the cluster's EP list. **"fqdn: port" needs to be enclosed in double quotation marks**, otherwise an error will occur. Notice that the example "[h2.taos.com](http://h2.taos.com/): 6030" is replaced with the End Point for this new data node. + +3. And then execute the command + +1. ``` + SHOW DNODES; + ``` + +2. Check to see if the new node was successfully joined. If the added data node is offline, then check: + +1. - Check whether the taosd of this data node is working properly. If it is not working properly, you need to check the reason first + - Check the first few lines of the data node taosd log file taosdlog.0 (usually in the /var/log/taos directory) to see if the data node fqdn and port number output in the log are the just added End Point. If not, you need to add the correct End Point. + +According to the above steps, new data nodes can be continuously added to the cluster. + +**Tips**: + +- Any data node that has joined the cluster online can be used as the firstEP of the subsequent node to be joined. +- firstEp is only effective when the data node joins the cluster for the first time. After joining the cluster, the data node will save the latest End Point list of mnode and no longer rely on this parameter. +- The two dnode data nodes dnode that are not configured with the firstEp parameter will run independently after startup. At this time, one data node cannot be added to another data node to form a cluster. **You cannot merge two independent clusters into a new cluster**. + +## Data Node Management + +The above has already introduced how to build clusters from scratch. After the cluster is formed, new data nodes can be added at any time for expansion, or data nodes can be deleted, and the current status of the cluster can be checked. + +### Add data nodes + +Execute CLI program taos, log in to the system using root account, and execute: + +``` +CREATE DNODE "fqdn:port"; +``` + +Add the End Point for the new data node to the cluster's EP list. **"fqdn: port" needs to be enclosed in double quotation marks**, otherwise an error will occur. The fqdn and port of a data node's external service can be configured through the configuration file taos.cfg, which is automatically obtained by default. [It is strongly not recommended to configure FQDN with automatic acquisition, which may cause the End Point of the generated data node to be not expected] + +### Delete data nodes + +Execute the CLI program taos, log in to the TDengine system using the root account, and execute: + +``` +DROP DNODE "fqdn:port"; +``` + +Where fqdn is the FQDN of the deleted node, and port is the port number of its external server. + +**【Note】** + +- Once a data node is dropped, it cannot rejoin the cluster. This node needs to be redeployed (emptying the data folder). The cluster migrates the data from the dnode before it completes the drop dnode operation. +- Note that dropping a dnode and stopping the taosd process are two different concepts. Don't be confused: the data migration operation must be performed before deleting a dnode, thus the deleted dnode must remain online. The taosd process cannot be stopped until the delete operation is completed. +- After a data node is dropped, other nodes will perceive the deletion of this dnodeID, and no node in any cluster will receive the request of the dnodeID. +- dnodeID is automatically assigned by the cluster and cannot be specified manually. It is incremented at the time of generation and does not repeat. + +### View data nodes + +Execute the CLI program taos, log in to the TDengine system using the root account, and execute: + +``` +SHOW DNODES; +``` + +All dnodes, fqdn: port for each dnode, status (ready, offline, etc.), number of vnodes, number of unused vnodes in the cluster will be listed. You can use this command to view after adding or deleting a data node. + +### View virtual node group + +In order to make full use of multi-core technology and provide scalability, data needs to be processed in partitions. Therefore, TDengine will split the data of a DB into multiple parts and store them in multiple vnodes. These vnodes may be distributed in multiple data node dnodes, thus realizing scale-out. A vnode belongs to only one DB, but a DB can have multiple vnodes. vnode is allocated automatically by mnode according to the current system resources without any manual intervention. + +Execute the CLI program taos, log in to the TDengine system using the root account, and execute: + +``` +SHOW VGROUPS; +``` + +## High-availability of vnode + +TDengine provides high-availability of system through a multi-replica mechanism, including high-availability of vnode and mnode. + +The number of replicas of vnode is associated with DB. There can be multiple DBs in a cluster. Each DB can be configured with different replicas according to operational requirements. When creating a database, specify the number of replicas with parameter replica (the default is 1). If the number of replicas is 1, the reliability of the system cannot be guaranteed. As long as the node where the data is located goes down, the service cannot be provided. The number of nodes in the cluster must be greater than or equal to the number of replicas, otherwise the error "more dnodes are needed" will be returned when creating a table. For example, the following command will create a database demo with 3 replicas: + +``` +CREATE DATABASE demo replica 3; +``` + +The data in a DB will be partitioned and splitted into multiple vnode groups. The number of vnodes in a vnode group is the number of replicas of the DB, and the data of each vnode in the same vnode group is completely consistent. In order to ensure high-availability, the vnodes in a vnode group must be distributed in different dnode data nodes (in actual deployment, they need to be on different physical machines). As long as more than half of the vnodes in a vgroup are working, the vgroup can be normally serving. + +There may be data from multiple DBs of data in a data node dnode, so when a dnode is offline, it may affect multiple DBs. If half or more of the vnodes in a vnode group do not work, then the vnode group cannot serve externally and cannot insert or read data, which will affect the reading and writing operations of some tables in the DB to which it belongs. + +Because of the introduction of vnode, it is impossible to simply draw a conclusion: "If more than half of the data nodes in the cluster work in dnode, the cluster should work." But for simple cases, it is easier to judge. For example, if the number of replicas is 3 and there are only 3 dnodes, the whole cluster can still work normally if only one node does not work, but if two data nodes do not work, the whole cluster cannot work normally. + +## High-availability of mnode + +TDengine cluster is managed by mnode (a module of taosd, management node). In order to ensure the high-availability of mnode, multiple mnode replicas can be configured. The number of replicas is determined by system configuration parameter numOfMnodes, and the effective range is 1-3. In order to ensure the strong consistency of metadata, mnode replicas are duplicated synchronously. + +A cluster has multiple data node dnodes, but a dnode runs at most one mnode instance. In the case of multiple dnodes, which dnode can be used as an mnode? This is automatically specified by the system according to the resource situation on the whole. User can execute the following command in the console of TDengine through the CLI program taos: + +``` +SHOW MNODES; +``` + +To view the mnode list, which lists the End Point and roles (master, slave, unsynced, or offline) of the dnode where the mnode is located. When the first data node in the cluster starts, the data node must run an mnode instance, otherwise the dnode of the data node cannot work properly because a system must have at least one mnode. If numOfMnodes is configured to 2, when the second dnode is started, the latter will also run an mnode instance. + +To ensure the high-availability of mnode service, numOfMnodes must be set to 2 or greater. Because the metadata saved by mnode must be strongly consistent, if numOfMnodes is greater than 2, the duplication parameter quorum is automatically set to 2, that is to say, at least two replicas must be guaranteed to write the data successfully before notifying the client application of successful writing. + +**Note:** A TDengine highly-available system, whether vnode or mnode, must be configured with multiple replicas. + +## Load Balancing + +There are three situations in which load balancing will be triggered, and no manual intervention is required. + +- When a new data node is added to the cluster, the system will automatically trigger load balancing, and the data on some nodes will be automatically migrated to the new data node without any manual intervention. +- When a data node is removed from the cluster, the system will automatically migrate the data on the data node to other data nodes without any manual intervention. +- If a data node is overheated (too large amount of data), the system will automatically load balance and migrate some vnodes of the data node to other nodes. + +When the above three situations occur, the system will start a load computing of each data node to decide how to migrate. + +**[Tip] Load balancing is controlled by parameter balance, which determines whether to start automatic load balancing.** + +## Offline Processing of Data Nodes + +If a data node is offline, the TDengine cluster will automatically detect it. There are two detailed situations: + +- If the data node is offline for more than a certain period of time (configuration parameter offlineThreshold in taos.cfg controls the duration), the system will automatically delete the data node, generate system alarm information and trigger the load balancing process. If the deleted data node is online again, it will not be able to join the cluster, and the system administrator will need to add it to the cluster again. +- After offline, the system will automatically start the data recovery process if it goes online again within the duration of offlineThreshold. After the data is fully recovered, the node will start to work normally. + +**Note:** If each data node belonging to a virtual node group (including mnode group) is in offline or unsynced state, Master can only be elected after all data nodes in the virtual node group are online and can exchange status information, and the virtual node group can serve externally. For example, the whole cluster has 3 data nodes with 3 replicas. If all 3 data nodes go down and then 2 data nodes restart, it will not work. Only when all 3 data nodes restart successfully can serve externally again. + +## How to Use Arbitrator + +If the number of replicas is even, it is impossible to elect a master from a vnode group when half of the vnodes are not working. Similarly, when half of the mnodes are not working, the master of the mnode cannot be elected because of the "split brain" problem. To solve this problem, TDengine introduced the concept of Arbitrator. Arbitrator simulates a vnode or mnode working, but is simply responsible for networking, and does not handle any data insertion or access. As long as more than half of the vnodes or mnodes, including the Arbitrator, work, the vnode group or mnode group can normally provide data insertion or query services. For example, in the case of 2 replicas, if one node A is offline, but the other node B is normal on and can connect to the Arbitrator, then node B can work normally. + +In a word, under the current version, TDengine recommends configuring Arbitrator in double-replica environment to improve the availability. + +The name of the executable for Arbitrator is tarbitrator. The executable has almost no requirements for system resources, just need to ensure a network connection, with any Linux server to run it. The following briefly describes the steps to install the configuration: + + + +1. Click [Package Download](https://www.taosdata.com/cn/all-downloads/), and in the TDengine Arbitrator Linux section, select the appropriate version to download and install. +2. The command line parameter -p of this application can specify the port number of its external service, and the default is 6042. +3. Modify the configuration file of each taosd instance, and set parameter arbitrator to the End Point corresponding to the tarbitrator in taos.cfg. (If this parameter is configured, when the number of replicas is even, the system will automatically connect the configured Arbitrator. If the number of replicas is odd, even if the Arbitrator is configured, the system will not establish a connection.) +4. The Arbitrator configured in the configuration file will appear in the return result of instruction `SHOW DNODES`; the value of the corresponding role column will be "arb". + diff --git a/documentation20/en/11.administrator/docs.md b/documentation20/en/11.administrator/docs.md new file mode 100644 index 0000000000000000000000000000000000000000..3817a41766d515d663661fd4382c883e0d8f179b --- /dev/null +++ b/documentation20/en/11.administrator/docs.md @@ -0,0 +1,496 @@ +# TDengine Operation and Maintenance + +## Capacity Planing + +Using TDengine to build an IoT big data platform, computing resource and storage resource need to be planned according to business scenarios. The following is a discussion of the memory, CPU and hard disk space required for the system to run. + +### Memory requirements + +Each DB can create a fixed number of vgroups, which is the same as the CPU cores by default and can be configured by maxVgroupsPerDb; each replica in the vgroup would be a vnode; each vnode takes up a fixed amount of memory (the size is related to the database's configuration parameters blocks and cache); each table takes up memory related to the total length of the tag; in addition, the system will have some fixed memory overhead. Therefore, the system memory required for each DB can be calculated by the following formula: + +``` +Database Memory Size = maxVgroupsPerDb * (blocks * cache + 10MB) + numOfTables * (tagSizePerTable + 0.5KB) +``` + +Example: Assuming a 4-core machine, cache is the default size of 16M, blocks is the default value of 6, assuming there are 100,000 tables, and the total tag length is 256 bytes, the total memory requirement is: 4 * (16 * 6 + 10) + 100,000 * (0.25 + 0.5)/1000 = 499M. + +The actual running system often stores the data in different DBs according to different characteristics of the data. All these shall be considered when planning. + +If there is plenty of memory, the configuration of Blocks can be increased so that more data will be stored in memory and the query speed will be improved. + +### CPU requirements + +CPU requirements depend on the following two aspects: + +- **Data insertion** TDengine single core can handle at least 10,000 insertion requests per second. Each insertion request can take multiple records, and inserting one record at a time is almost the same as inserting 10 records in computing resources consuming. Therefore, the larger the number of inserts, the higher the insertion efficiency. If an insert request has more than 200 records, a single core can insert 1 million records per second. However, the faster the insertion speed, the higher the requirement for front-end data collection, because records need to be cached and then inserted in batches. +- **Query requirements** TDengine to provide efficient queries, but the queries in each scenario vary greatly and the query frequency too, making it difficult to give objective figures. Users need to write some query statements for their own scenes to determine. + +Therefore, only for data insertion, CPU can be estimated, but the computing resources consumed by query cannot be that clear. In the actual operation, it is not recommended to make CPU utilization rate over 50%. After that, new nodes need to be added to bring more computing resources. + +### Storage requirements + +Compared with general databases, TDengine has an ultra-high compression ratio. In most scenarios, the compression ratio of TDengine will not be less than 5:1, and in some scenarios, maybe over 10:1, depending on the actual data characteristics. The raw data size before compressed can be calculated as follows: + +``` +Raw DataSize = numOfTables * rowSizePerTable * rowsPerTable +``` + +Example: 10 million smart meters, each meter collects data every 15 minutes, and the data collected each time is 128 bytes, so the original data amount in one year is: 10000000 * 128 * 24 * 60/15 * 365 = 44.8512 T. The TDengine consumes approximately 44.851/5 = 8.97024 T. + +User can set the maximum retention time of data on disk through parameter `keep`. In order to further reduce the storage cost, TDengine also provides tiered storage. The coldest data can be stored on the cheapest storage media. Application access does not need to be adjusted, but lower reading speed. + +To improve speed, multiple hard disks can be configured so that data can be written or read concurrently. It should be reminded that TDengine provides high reliability of data in the form of multiple replicas, so it is no longer necessary to use expensive disk arrays. + +### Number of physical or virtual machines + +According to the above estimation of memory, CPU and storage, we can know how many cores, how much memory and storage space the whole system needs. If the number of data replicas is not 1, the total demand needs to be multiplied by the number of replicas. + +Because TDengine provides great scale-out feature, it is easy to decide how many physical or virtual machines need to be purchased according to the total amount and the resources of a single physical/ virtual machine. + +**Calculate CPU, memory and storage immediately, see:** [**Resource Estimation**](https://www.taosdata.com/config/config.html) + +### Fault Tolerance and Disaster Recovery + +### Fault tolerance + +TDengine supports WAL (Write Ahead Log) mechanism to realize fault tolerance of data and ensure high-availability of data. + +When TDengine receives the application's request packet, it first writes the requested original packet into the database log file, and then deletes the corresponding WAL after the data is successfully written. This ensures that TDengine can recover data from the database log file when the service is restarted due to power failure or other reasons, thus avoiding data loss. + +There are two system configuration parameters involved: + +- walLevel: WAL level, 0: do not write wal; 1: write wal, but do not execute fsync; 2: write wal and execute fsync. +- fsync: the cycle in which fsync is executed when walLevel is set to 2. Setting to 0 means that fsync is executed immediately whenever there is a write. + +To guarantee 100% data safe, you need to set walLevel to 2 and fsync to 0. In that way, the write speed will decrease. However, if the number of threads starting to write data on the application side reaches a certain number (more than 50), the performance of writing data will also be good, only about 30% lower than that of fsync set to 3000 milliseconds. + +### Disaster recovery + +The cluster of TDengine provides high-availability of the system and implements disaster recovery through the multipl-replica mechanism. + +TDengine cluster is managed by mnode. In order to ensure the high reliability of the mnode, multiple mnode replicas can be configured. The number of replicas is determined by system configuration parameter numOfMnodes. In order to support high reliability, it needs to be set to be greater than 1. In order to ensure the strong consistency of metadata, mnode replicas duplicate data synchronously to ensure the strong consistency of metadata. + +The number of replicas of time-series data in TDengine cluster is associated with databases. There can be multiple databases in a cluster, and each database can be configured with different replicas. When creating a database, specify the number of replicas through parameter replica. In order to support high reliability, it is necessary to set the number of replicas greater than 1. + +The number of nodes in TDengine cluster must be greater than or equal to the number of replicas, otherwise an error will be reported in table creation. + +When the nodes in TDengine cluster are deployed on different physical machines and multiple replicas are set, the high reliability of the system is implemented without using other software or tools. TDengine Enterprise Edition can also deploy replicas in different server rooms, thus realizing remote disaster recovery. + +## Server-side Configuration + +The background service of TDengine system is provided by taosd, and the configuration parameters can be modified in the configuration file taos.cfg to meet the requirements of different scenarios. The default location of the configuration file is the /etc/taos directory, which can be specified by executing the parameter -c from the taosd command line. Such as taosd-c/home/user, to specify that the configuration file is located in the /home/user directory. + +You can also use “-C” to show the current server configuration parameters: + +``` +taosd -C +``` + +Only some important configuration parameters are listed below. For more parameters, please refer to the instructions in the configuration file. Please refer to the previous chapters for detailed introduction and function of each parameter, and the default of these parameters is working and generally does not need to be set. **Note: After the configuration is modified, \*taosd service\* needs to be restarted to take effect.** + +- firstEp: end point of the first dnode in the actively connected cluster when taosd starts, the default value is localhost: 6030. +- fqdn: FQDN of the data node, which defaults to the first hostname configured by the operating system. If you are accustomed to IP address access, you can set it to the IP address of the node. +- serverPort: the port number of the external service after taosd started, the default value is 6030. +- httpPort: the port number used by the RESTful service to which all HTTP requests (TCP) require a query/write request. The default value is 6041. +- dataDir: the data file directory to which all data files will be written. [Default:/var/lib/taos](http://default/var/lib/taos). +- logDir: the log file directory to which the running log files of the client and server will be written. [Default:/var/log/taos](http://default/var/log/taos). +- arbitrator: the end point of the arbiter in the system; the default value is null. +- role: optional role for dnode. 0-any; it can be used as an mnode and to allocate vnodes; 1-mgmt; It can only be an mnode, but not to allocate vnodes; 2-dnode; caannot be an mnode, only vnode can be allocated +- debugFlage: run the log switch. 131 (output error and warning logs), 135 (output error, warning, and debug logs), 143 (output error, warning, debug, and trace logs). Default value: 131 or 135 (different modules have different default values). +- numOfLogLines: the maximum number of lines allowed for a single log file. Default: 10,000,000 lines. +- logKeepDays: the maximum retention time of the log file. When it is greater than 0, the log file will be renamed to taosdlog.xxx, where xxx is the timestamp of the last modification of the log file in seconds. Default: 0 days. +- maxSQLLength: the maximum length allowed for a single SQL statement. Default: 65380 bytes. +- telemetryReporting: whether TDengine is allowed to collect and report basic usage information. 0 means not allowed, and 1 means allowed. Default: 1. +- stream: whether continuous query (a stream computing function) is enabled, 0 means not allowed, 1 means allowed. Default: 1. +- queryBufferSize: the amount of memory reserved for all concurrent queries. The calculation rule can be multiplied by the number of the table according to the maximum possible concurrent number in practical application, and then multiplied by 170. The unit is MB (in versions before 2.0. 15, the unit of this parameter is byte). +- ratioOfQueryCores: set the maximum number of query threads. The minimum value of 0 means that there is only one query thread; the maximum value of 2 indicates that the maximum number of query threads established is 2 times the number of CPU cores. The default is 1, which indicates the maximum number of query threads equals to the number of CPU cores. This value can be a decimal, that is, 0.5 indicates that the query thread with half of the maximum CPU cores is established. + +**Note:** for ports, TDengine will use 13 continuous TCP and UDP port numbers from serverPort, so be sure to open them in the firewall. Therefore, if it is the default configuration, a total of 13 ports from 6030 to 6042 need to be opened, and the same for both TCP and UDP. + +Data in different application scenarios often have different data characteristics, such as retention days, number of replicas, collection frequency, record size, number of collection points, compression, etc. In order to obtain the best efficiency in storage, TDengine provides the following storage-related system configuration parameters: + +- days: the time span for a data file to store data, in days, the default value is 10. +- keep: the number of days to keep data in the database, in days, default value: 3650. +- minRows: the minimum number of records in a file block, in pieces, default: 100. +- maxRows: the maximum number of records in a file block, in pieces, default: 4096. +- comp: file compression flag bit, 0: off; 1: one-stage compression; 2: two-stage compression. Default: 2. +- walLevel: WAL level. 1: write wal, but do not execute fsync; 2: write wal and execute fsync. Default: 1. +- fsync: the period during which fsync is executed when wal is set to 2. Setting to 0 means that fsync is executed immediately whenever a write happens, in milliseconds, and the default value is 3000. +- cache: the size of the memory block in megabytes (MB), default: 16. +- blocks: how many cache-sized memory blocks are in each VNODE (TSDB). Therefore, the memory size used by a VNODE is roughly (cache * blocks), in blocks, and the default value is 4. +- replica: number of replicas; value range: 1-3, in items, default value: 1 +- precision: timestamp precision identification, ms for milliseconds and us for microseconds. Default: ms +- cacheLast: whether the sub-table last_row is cached in memory, 0: off; 1: on. Default: 0. (This parameter is supported as of version 2.0. 11) + +For an application scenario, there may be data with multiple characteristics coexisting. The best design is to put tables with the same data characteristics in one database. Such an application has multiple databases, and each one can be configured with different storage parameters, thus ensuring the optimal performance of the system. TDengine allows the application to specify the above storage parameter in database creation. If specified, the parameters will override the corresponding system configuration parameters. For example, there is the following SQL: + +``` + create database demo days 10 cache 32 blocks 8 replica 3 update 1; +``` + +The SQL creates a database demo, each data file stores 10 days of data, the memory block is 32 megabytes, each VNODE occupies 8 memory blocks, the number of replicas is 3, updates are allowed, and other parameters are completely consistent with the system configuration. + +When adding a new dnode to the TDengine cluster, some parameters related to the cluster must be the same as the configuration of the existing cluster, otherwise it cannot be successfully added to the cluster. The parameters that will be verified are as follows: + +- numOfMnodes: the number of management nodes in the system. Default: 3. (Since version 2.0.20.11 and version 2.1.6.0, the default value of "numOfMnodes" has been changed to 1.) +- balance: whether to enable load balancing. 0: No, 1: Yes. Default: 1. +- mnodeEqualVnodeNum: an mnode is equal to the number of vnodes consumed. Default: 4. +- offlineThreshold: the threshold for a dnode to be offline, exceed which the dnode will be removed from the cluster. The unit is seconds, and the default value is 86400*10 (that is, 10 days). +- statusInterval: the length of time dnode reports status to mnode. The unit is seconds, and the default value is 1. +- maxTablesPerVnode: the maximum number of tables that can be created in each vnode. Default: 1000000. +- maxVgroupsPerDb: the maximum number of vgroups that can be used in each database. +- arbitrator: the end point of the arbiter in system, which is empty by default. +- See Client Configuration for the configuration of timezone, locale and charset. + +For the convenience of debugging, the log configuration of each dnode can be temporarily adjusted through SQL statements, and all will be invalid after system restarting: + +```mysql +ALTER DNODE +``` + +- dnode_id: available from the SQL statement "SHOW DNODES" command +- config: the log parameter to be adjusted, and the value is taken in the following list + +resetlog truncates the old log file and creates a new log file debugFlag < 131 135 143 > Set debugFlag to 131, 135 or 143. + +For example: + +``` + alter dnode 1 debugFlag 135; +``` + +## Client Configuration + +The foreground interactive client application of TDengine system is taos and application driver, which shares the same configuration file taos.cfg with taosd. When running taos, use the parameter -c to specify the configuration file directory, such as taos-c/home/cfg, which means using the parameters in the taos.cfg configuration file under the /home/cfg/ directory. The default directory is /etc/taos. For more information on how to use taos, see the help information taos --help. This section mainly describes the parameters used by the taos client application in the configuration file taos.cfg. + +**Versions after 2.0. 10.0 support the following parameters on command line to display the current client configuration parameters** + +```bash +taos -C 或 taos --dump-config +``` + +Client configuration parameters: + +- firstEp: end point of the first taosd instance in the actively connected cluster when taos is started, the default value is localhost: 6030. +- secondEp: when taos starts, if not impossible to connect to firstEp, it will try to connect to secondEp. +- locale + Default value: obtained dynamically from the system. If the automatic acquisition fails, user needs to set it in the configuration file or through API + + TDengine provides a special field type nchar for storing non-ASCII encoded wide characters such as Chinese, Japanese and Korean. The data written to the nchar field will be uniformly encoded in UCS4-LE format and sent to the server. It should be noted that the correctness of coding is guaranteed by the client. Therefore, if users want to normally use nchar fields to store non-ASCII characters such as Chinese, Japanese, Korean, etc., it’s needed to set the encoding format of the client correctly. + + The characters inputted by the client are all in the current default coding format of the operating system, mostly UTF-8 on Linux systems, and some Chinese system codes may be GB18030 or GBK, etc. The default encoding in the docker environment is POSIX. In the Chinese versions of Windows system, the code is CP936. The client needs to ensure that the character set it uses is correctly set, that is, the current encoded character set of the operating system running by the client, in order to ensure that the data in nchar is correctly converted into UCS4-LE encoding format. + + The naming rules of locale in Linux are: < language > _ < region >. < character set coding >, such as: zh_CN.UTF-8, zh stands for Chinese, CN stands for mainland region, and UTF-8 stands for character set. Character set encoding provides a description of encoding transformations for clients to correctly parse local strings. Linux system and Mac OSX system can determine the character encoding of the system by setting locale. Because the locale used by Windows is not the POSIX standard locale format, another configuration parameter charset is needed to specify the character encoding under Windows. You can also use charset to specify character encoding in Linux systems. + +- charset + + Default value: obtained dynamically from the system. If the automatic acquisition fails, user needs to set it in the configuration file or through API + + If charset is not set in the configuration file, in Linux system, when taos starts up, it automatically reads the current locale information of the system, and parses and extracts the charset encoding format from the locale information. If the automatic reading of locale information fails, an attempt is made to read the charset configuration, and if the reading of the charset configuration also fails, the startup process is interrupted. + + In Linux system, locale information contains character encoding information, so it is unnecessary to set charset separately after setting locale of Linux system correctly. For example: + + ``` + locale zh_CN.UTF-8 + ``` + On Windows systems, the current system encoding cannot be obtained from locale. If string encoding information cannot be read from the configuration file, taos defaults to CP936. It is equivalent to adding the following to the configuration file: + ``` + charset CP936 + ``` + If you need to adjust the character encoding, check the encoding used by the current operating system and set it correctly in the configuration file. + + In Linux systems, if user sets both locale and charset encoding charset, and the locale and charset are inconsistent, the value set later will override the value set earlier. + ``` + locale zh_CN.UTF-8 + charset GBK + ``` + The valid value for charset is GBK. + + And the valid value for charset is UTF-8. + + The configuration parameters of log are exactly the same as those of server. + +- timezone + + Default value: get the current time zone option dynamically from the system + + The time zone in which the client runs the system. In order to deal with the problem of data writing and query in multiple time zones, TDengine uses Unix Timestamp to record and store timestamps. The characteristics of UNIX timestamps determine that the generated timestamps are consistent at any time regardless of any time zone. It should be noted that UNIX timestamps are converted and recorded on the client side. In order to ensure that other forms of time on the client are converted into the correct Unix timestamp, the correct time zone needs to be set. + + In Linux system, the client will automatically read the time zone information set by the system. Users can also set time zones in profiles in a number of ways. For example: + ``` + timezone UTC-8 + timezone GMT-8 + timezone Asia/Shanghai + ``` + + All above are legal to set the format of the East Eight Zone. + + The setting of time zone affects the content of non-Unix timestamp (timestamp string, parsing of keyword now) in query and writing SQL statements. For example: + + ```sql + SELECT count(*) FROM table_name WHERE TS<'2019-04-11 12:01:08'; + ``` + + In East Eight Zone, the SQL statement is equivalent to + ```sql + SELECT count(*) FROM table_name WHERE TS<1554955268000; + ``` + + In the UTC time zone, the SQL statement is equivalent to + ```sql + SELECT count(*) FROM table_name WHERE TS<1554984068000; + ``` + In order to avoid the uncertainty caused by using string time format, Unix timestamp can also be used directly. In addition, timestamp strings with time zones can also be used in SQL statements, such as: timestamp strings in RFC3339 format, 2013-04-12T15:52:01.123+08:00, or ISO-8601 format timestamp strings 2013-04-12T15:52:01.123+0800. The conversion of the above two strings into Unix timestamps is not affected by the time zone in which the system is located. + + When starting taos, you can also specify an end point for an instance of taosd from the command line, otherwise read from taos.cfg. + +- maxBinaryDisplayWidth + The upper limit of the display width of binary and nchar fields in a shell, beyond which parts will be hidden. Default: 30. You can modify this option dynamically in the shell with the command set max_binary_display_width nn. + +## User Management + +System administrators can add and delete users in CLI, and also modify passwords. The SQL syntax in the CLI is as follows: + +```sql +CREATE USER PASS <'password'>; +``` + +Create a user, and specify the user name and password. The password needs to be enclosed in single quotation marks. The single quotation marks are in English half-width. + +```sql +DROP USER ; +``` + +Delete a user, root only. + +```sql +ALTER USER PASS <'password'>; +``` + +Modify the user password. In order to avoid being converted to lowercase, the password needs to be quoted in single quotation marks. The single quotation marks are in English half-width + +```sql +ALTER USER PRIVILEGE ; +``` + +Modify the user privilege to: write or read, without adding single quotation marks. + +Note: There are three privilege levels: super/write/read in the system, but it is not allowed to give super privilege to users through alter instruction at present. + +```mysql +SHOW USERS; +``` + +Show all users + +**Note:** In SQL syntax, < > indicates the part that requires user to input, but do not enter < > itself + +## Import Data + +TDengine provides a variety of convenient data import functions, including imported by script file, by data file, and by taosdump tool. + +**Import by script file** + +TDengine shell supports source filename command, which is used to run SQL statements from a file in batch. Users can write SQL commands such as database building, table building and data writing in the same file. Each command has a separate line. By running source command in the shell, SQL statements in the file can be run in batches in sequence. SQL statements beginning with '#' are considered comments and are automatically ignored by the shell. + +**Import by data file** + +TDengine also supports data import from CSV files on existing tables in the shell. The CSV file belongs to only one table, and the data format in the CSV file should be the same as the structure of the table to be imported. When importing, its syntax is as follows: + +```mysql +insert into tb1 file 'path/data.csv'; +``` + +Note: if there is descriptive information in the first line of the CSV file, please delete it manually before importing + +For example, there is now a sub-table d1001 whose table structure is as follows: + +```mysql +taos> DESCRIBE d1001 + Field | Type | Length | Note | +================================================================================= + ts | TIMESTAMP | 8 | | + current | FLOAT | 4 | | + voltage | INT | 4 | | + phase | FLOAT | 4 | | + location | BINARY | 64 | TAG | + groupid | INT | 4 | TAG | +``` + +And the format of the data.csv to import is as follows: + +```csv +'2018-10-04 06:38:05.000',10.30000,219,0.31000 +'2018-10-05 06:38:15.000',12.60000,218,0.33000 +'2018-10-06 06:38:16.800',13.30000,221,0.32000 +'2018-10-07 06:38:05.000',13.30000,219,0.33000 +'2018-10-08 06:38:05.000',14.30000,219,0.34000 +'2018-10-09 06:38:05.000',15.30000,219,0.35000 +'2018-10-10 06:38:05.000',16.30000,219,0.31000 +'2018-10-11 06:38:05.000',17.30000,219,0.32000 +'2018-10-12 06:38:05.000',18.30000,219,0.31000 +``` + +Then we can use the following command to import: + +```mysql +taos> insert into d1001 file '~/data.csv'; +Query OK, 9 row(s) affected (0.004763s) +``` + +**Import via taosdump tool** + +TDengine provides a convenient database import and export tool, taosdump. Users can import data exported by taosdump from one system into other systems. Please refer to the blog: [User Guide of TDengine DUMP Tool](https://www.taosdata.com/blog/2020/03/09/1334.html). + +## Export Data + +To facilitate data export, TDengine provides two export methods, namely, export by table and export by taosdump. + +**Export CSV file by table** + +If user needs to export data from a table or a STable, it can run in a shell + +```mysql +select * from >> data.csv; +``` + +In this way, the data in table tb_name will be exported to the file data.csv in CSV format. + +**Export data by taosdump** + +TDengine provides a convenient database export tool, taosdump. Users can choose to export all databases, a database or a table in a database, all data or data for a time period, or even just the definition of a table as needed. Please refer to the blog: [User Guide of TDengine DUMP Tool](https://www.taosdata.com/blog/2020/03/09/1334.html) + +## System Connection and Task Query Management + +The system administrator can query the connection, ongoing query and stream computing of the system from CLI, and can close the connection and stop the ongoing query and stream computing. The SQL syntax in the CLI is as follows: + +```mysql +SHOW CONNECTIONS; +``` + +Show the connection of the database, and one column shows ip: port, which is the IP address and port number of the connection. + +```mysql +KILL CONNECTION ; +``` + +Force the database connection to close, where connection-id is the number in the first column displayed in SHOW CONNECTIONS. + +```mysql +SHOW QUERIES; +``` + +Show the data query, where the two numbers separated by colons displayed in the first column are query-id and the connection-id that initiated the query application connection and the number of queries. + +```mysql +KILL QUERY ; +``` + +Force to close the data query, where query-id is the connection-id: query-no string displayed in SHOW QUERIES, such as "105: 2", copy and paste it. + +```mysql +SHOW STREAMS; +``` + +Show the stream computing, where the first column shows the two numbers separated by colons as stream-id and the connection-id to start the stream application connection and the number of times the stream was initiated. + +```mysql +KILL STREAM ; +``` + +Force to turn off the stream computing, in which stream-id is the connection-id: stream-no string displayed in SHOW STREAMS, such as 103: 2, copy and paste it. + +## System Monitoring + +After TDengine is started, it will automatically create a monitoring database log and write the server's CPU, memory, hard disk space, bandwidth, number of requests, disk read-write speed, slow query and other information into the database regularly. TDengine also records important system operations (such as logging in, creating, deleting databases, etc.) logs and various error alarm information and stores them in the log database. The system administrator can view the database directly from CLI or view the monitoring information through GUI on WEB. + +The collection of these monitoring metrics is turned on by default, but you can modify option enableMonitor in the configuration file to turn it off or on. + +## File Directory Structure + +After installing TDengine, the following directories or files are generated in the operating system by default: + + + +| **Directory/File** | **Description** | +| ------------------------- | ------------------------------------------------------------ | +| /usr/local/taos/bin | TEngine’s executable directory. The executables are connected to the/usr/bin directory via softly links. | +| /usr/local/taos/connector | TDengine’s various connector directories. | +| /usr/local/taos/driver | TDengine’s dynamic link library directory. Connect to /usr/lib directory via soft links. | +| /usr/local/taos/examples | TDengine’s application example directory for various languages. | +| /usr/local/taos/include | TDengine’s header files of C interface for externally serving. | +| /etc/taos/taos.cfg | TDengine’s default [configuration files]. | +| /var/lib/taos | TDengine’s default data file directory, where the local can be modified via [configuration files]. | +| /var/log/taos | TDengine’s default log file directory, where the local can be modified via [configuration files]. | + +**Executables** + +All executables of TDengine are stored in the directory /usr/local/taos/bin by default. Including: + +- *taosd*: TDengine server-side executable +- *taos*: TDengine Shell executable +- *taosdump*: A data import/export tool +- remove.sh: uninstall the TDengine script, please execute carefully, and link to rmtaos command in the/usr/bin directory. The TDengine installation directory /usr/local/taos will be removed, but/etc/taos,/var/lib/taos,/var/log/taos will remain. + +You can configure different data directories and log directories by modifying system configuration file taos.cfg. + +## TDengine Parameter Limits and Reserved Keywords + +- Database name: cannot contain "." and other special characters, and cannot exceed 32 characters +- Table name: cannot contain "." and other special characters, and cannot exceed 192 characters together with the database name to which it belongs +- Table column name: cannot contain special characters, and cannot exceed 64 characters +- Database name, table name, column name cannot begin with a number +- Number of columns in table: cannot exceed 1024 columns +- Maximum length of record: including 8 bytes as timestamp, no more than 16KB (each column of BINARY/NCHAR type will occupy an additional 2 bytes of storage location) +- Default maximum string length for a single SQL statement: 65480 bytes +- Number of database replicas: no more than 3 +- User name: no more than 23 bytes +- User password: no more than 15 bytes +- Number of Tags: no more than 128 +- Total length of label: cannot exceed 16K bytes +- Number of records: limited by storage space only +- Number of tables: limited only by the number of nodes +- Number of databases: limited only by the number of nodes +- Number of virtual nodes on a single database: cannot exceed 64 + +At the moment, TDengine has nearly 200 internal reserved keywords, which cannot be used as database name, table name, STable name, data column name or tag column name regardless of case. The list of these keywords is as follows: + +| **List of Keywords** | | | | | +| -------------------- | ----------- | ------------ | ---------- | --------- | +| ABLOCKS | CONNECTIONS | GT | MNODES | SLIDING | +| ABORT | COPY | ID | MODULES | SLIMIT | +| ACCOUNT | COUNT | IF | NCHAR | SMALLINT | +| ACCOUNTS | CREATE | IGNORE | NE | SPREAD | +| ADD | CTIME | IMMEDIATE | NONE | STABLE | +| AFTER | DATABASE | IMPORT | NOT | STABLES | +| ALL | DATABASES | IN | NOTNULL | STAR | +| ALTER | DAYS | INITIALLY | NOW | STATEMENT | +| AND | DEFERRED | INSERT | OF | STDDEV | +| AS | DELIMITERS | INSTEAD | OFFSET | STREAM | +| ASC | DESC | INTEGER | OR | STREAMS | +| ATTACH | DESCRIBE | INTERVAL | ORDER | STRING | +| AVG | DETACH | INTO | PASS | SUM | +| BEFORE | DIFF | IP | PERCENTILE | TABLE | +| BEGIN | DISTINCT | IS | PLUS | TABLES | +| BETWEEN | DIVIDE | ISNULL | PRAGMA | TAG | +| BIGINT | DNODE | JOIN | PREV | TAGS | +| BINARY | DNODES | KEEP | PRIVILEGE | TBLOCKS | +| BITAND | DOT | KEY | QUERIES | TBNAME | +| BITNOT | DOUBLE | KILL | QUERY | TIMES | +| BITOR | DROP | LAST | RAISE | TIMESTAMP | +| BOOL | EACH | LE | REM | TINYINT | +| BOTTOM | END | LEASTSQUARES | REPLACE | TOP | +| BY | EQ | LIKE | REPLICA | TRIGGER | +| CACHE | EXISTS | LIMIT | RESET | UMINUS | +| CASCADE | EXPLAIN | LINEAR | RESTRICT | UPLUS | +| CHANGE | FAIL | LOCAL | ROW | USE | +| CLOG | FILL | LP | ROWS | USER | +| CLUSTER | FIRST | LSHIFT | RP | USERS | +| COLON | FLOAT | LT | RSHIFT | USING | +| COLUMN | FOR | MATCH | SCORES | VALUES | +| COMMA | FROM | MAX | SELECT | VARIABLE | +| COMP | GE | METRIC | SEMI | VGROUPS | +| CONCAT | GLOB | METRICS | SET | VIEW | +| CONFIGS | GRANTS | MIN | SHOW | WAVG | +| CONFLICT | GROUP | MINUS | SLASH | WHERE | +| CONNECTION | | | | | diff --git a/documentation20/en/12.taos-sql/docs.md b/documentation20/en/12.taos-sql/docs.md new file mode 100644 index 0000000000000000000000000000000000000000..2f344b45294eb629f2fd5c12be88707b3d3930df --- /dev/null +++ b/documentation20/en/12.taos-sql/docs.md @@ -0,0 +1,1246 @@ +# TAOS SQL + +TDengine provides a SQL-style language, TAOS SQL, to insert or query data, and support other common tips. To finish this document, you should have some understanding about SQL. + +TAOS SQL is the main tool for users to write and query data to TDengine. TAOS SQL provides a style and mode similar to standard SQL to facilitate users to get started quickly. Strictly speaking, TAOS SQL is not and does not attempt to provide SQL standard syntax. In addition, since TDengine does not provide deletion function for temporal structured data, the relevant function of data deletion is non-existent in TAO SQL. + +Let’s take a look at the conventions used for syntax descriptions. + +- The content in < > is what the user needs to enter, but do not enter < > itself +- [] indicates that the content is optional, but do not enter [] itself +- "|" means you can select one of multiple choices, but you cannot enter | yourself +- "…" means repeating for as many times + +In order to better explain the rules and characteristics of SQL syntax, this document assumes that there is a data set. Take smart meters as an example, each smart meter collects three metrics: current, voltage and phase. It is modeled as follows: + +```mysql +taos> DESCRIBE meters; + Field | Type | Length | Note | +================================================================================= + ts | TIMESTAMP | 8 | | + current | FLOAT | 4 | | + voltage | INT | 4 | | + phase | FLOAT | 4 | | + location | BINARY | 64 | TAG | + groupid | INT | 4 | TAG | +``` + +The data set contains data from four smart meters, which correspond to four sub-tables according to the modeling rules of TDengine, and their names are D1001, D1002, D1003 and D1004 respectively. + +## Data Types + +With TDengine, the most important thing is timestamp. When creating and inserting records and querying history records, you need to specify a timestamp. The timestamp has the following rules: + +- Time Format: 'YYYY-MM-DD HH:mm:ss.MS', default in milliseconds. For example,'2017-08-12 18:52:58.128' +- Internal Function **now** : this is the current time of the server +- When inserting a record, if timestamp is NOW, then use the server current time. +- Epch Time: a timestamp value can also be a long integer representing milliseconds since 1970-01-01 08:00:00.000. +- Arithmetic operations can be applied to timestamp. For example: now-2h represents a timestamp which is 2 hours ago from the current server time. Units include u( microsecond), a (milliseconds), s (seconds), m (minutes), h (hours), d (days), w (weeks). In `select * from t1 where ts > now-2w and ts <= now-1w`, which queries data of the whole week before two weeks. To specify the interval of down sampling, you can also use n(calendar month) and y(calendar year) as time units. + +Default time precision of TDengine is millisecond, you can change it to microseocnd by setting parameter enableMicrosecond. + +In TDengine, the following 10 data types can be used in data model of an ordinary table. + +| | **Data Type** | **Bytes** | **Note** | +| ---- | ------------- | --------- | ------------------------------------------------------------ | +| 1 | TIMESTAMP | 8 | Time stamp. Default in milliseconds, and support microseconds. Starting from 1970-01-01 00:00:00. 000 (UTC/GMT), the timing cannot be earlier than this time. | +| 2 | INT | 4 | A nullable integer type with a range of [-2^31+1, 2^31-1 ] | +| 3 | BIGINT | 8 | A nullable integer type with a range of [-2^59, 2^59 ] | +| 4 | FLOAT | 4 | A standard nullable float type with 6 -7 significant digits and a range of [-3.4E38, 3.4E38] | +| 5 | DOUBLE | 8 | A standard nullable double float type with 15-16 significant digits and a range of [-1.7E308, 1.7E308] | +| 6 | BINARY | Custom | Used to record ASCII strings. Theoretically, the maximum length can be 16,374 bytes, but since each row of data can be up to 16K bytes, the actual limit is generally smaller than the theoretical value. Binary only supports string input, and single quotation marks are used at both ends of the string, otherwise all English will be automatically converted to lowercase. When using, the size must be specified. For example, binary (20) defines a string with a maximum length of 20 characters, and each character occupies 1 byte of storage space. In this case, if the user string exceeds 20 bytes, an error will be reported. For single quotation marks in strings, they can be represented by escape character backslash plus single quotation marks, that is\ '. | +| 7 | SMALLINT | 2 | A nullable integer type with a range of [-32767, 32767] | +| 8 | TINYINT | 1 | A nullable integer type with a range of [-127, 127] | +| 9 | BOOL | 1 | Boolean type,{true, false} | +| 10 | NCHAR | Custom | Used to record non-ASCII strings, such as Chinese characters. Each nchar character takes up 4 bytes of storage space. Single quotation marks are used at both ends of the string, and escape characters are required for single quotation marks in the string, that is \’. When nchar is used, the string size must be specified. A column of type nchar (10) indicates that the string of this column stores up to 10 nchar characters, which will take up 40 bytes of space. If the length of the user string exceeds the declared length, an error will be reported. | + + + +**Tips**: + +1. TDengine is case-insensitive to English characters in SQL statements and automatically converts them to lowercase for execution. Therefore, the user's case-sensitive strings and passwords need to be enclosed in single quotation marks. +2. Avoid using BINARY type to save non-ASCII type strings, which will easily lead to errors such as garbled data. The correct way is to use NCHAR type to save Chinese characters. + +## Database Management + +- **Create a Database** + + ```mysql + CREATE DATABASE [IF NOT EXISTS] db_name [KEEP keep] [DAYS days] [UPDATE 1]; + ``` + +Note: + +1. KEEP is how long the data of the database is kept, the default is 3650 days (10 years), and the database will automatically delete the data expired; +2. UPDATE marks the database support updating the same timestamp data; +3. Maximum length of the database name is 33; +4. Maximum length of a SQL statement is 65480 characters; +5. Database has more storage-related configuration parameters, see System Management. + +- **Show current system parameters** + + ```mysql + SHOW VARIABLES; + ``` + +- **Use a database** + + ```mysql + USE db_name; + ``` + Use/switch database + +- **Drop a database** + ```mysql + DROP DATABASE [IF EXISTS] db_name; + ``` + Delete a database, all data tables included will be deleted. Please use with caution. + +- **Modify database parameters** + + ```mysql + ALTER DATABASE db_name COMP 2; + ``` + COMP parameter modifies the database file compression flag bit, with the default value of 2 and the value range is [0, 2]. 0 means no compression, 1 means one-stage compression, and 2 means two-stage compression. + + ```mysql + ALTER DATABASE db_name REPLICA 2; + ``` + REPLICA parameter refers to the number of replicas of the modified database, and the value range is [1, 3]. For use in a cluster, the number of replicas must be less than or equal to the number of DNODE. + + ```mysql + ALTER DATABASE db_name KEEP 365; + ``` + The KEEP parameter refers to the number of days to save a modified data file. The default value is 3650, and the value range is [days, 365000]. It must be greater than or equal to the days parameter value. + + ```mysql + ALTER DATABASE db_name QUORUM 2; + ``` + QUORUM parameter refers to the number of confirmations required for successful data writing, and the value range is [1, 3]. For asynchronous replication, quorum is set to 1, and the virtual node with master role can confirm it by itself. For synchronous replication, it needs to be at least 2 or greater. In principle, Quorum > = 1 and Quorum < = replica number, which is required when starting a synchronization module instance. + + ```mysql + ALTER DATABASE db_name BLOCKS 100; + ``` + BLOCKS parameter is the number of cache-sized memory blocks in each VNODE (TSDB), so the memory size used for a VNODE equals roughly (cache * blocks). Value range is [3,1000]. + + ```mysql + ALTER DATABASE db_name CACHELAST 0; + ``` + CACHELAST parameter controls whether last_row of the data subtable is cached in memory. The default value is 0, and the value range is [0, 1]. Where 0 means not enabled and 1 means enabled. (supported from version 2.0. 11) + + **Tips**: After all the above parameters are modified, show databases can be used to confirm whether the modification is successful. + +- **Show all databases in system** + + ```mysql + SHOW DATABASES; + ``` + +## Table Management + +- Create a table +Note: + +1. The first field must be a timestamp, and system will set it as the primary key; +2. The max length of table name is 192; +3. The length of each row of the table cannot exceed 16k characters; +4. Sub-table names can only consist of letters, numbers, and underscores, and cannot begin with numbers +5. If the data type binary or nchar is used, the maximum number of bytes should be specified, such as binary (20), which means 20 bytes; + +- **Create a table via STable** + + ```mysql + CREATE TABLE [IF NOT EXISTS] tb_name USING stb_name TAGS (tag_value1, ...); + ``` + Use a STable as template and assign tag values to create a data table. + +- **Create a data table using STable as a template and specify a specific tags column** + + ```mysql + CREATE TABLE [IF NOT EXISTS] tb_name USING stb_name (tag_name1, ...) TAGS (tag_value1, ...); + ``` + Using the specified STable as a template, specify the values of some tags columns to create a data table. (Unspecified tags columns are set to null values.) + Note: This method has been supported since version 2.0. 17. In previous versions, tags columns were not allowed to be specified, but the values of all tags columns must be explicitly given. + +- **Create tables in batches** + + ```mysql + CREATE TABLE [IF NOT EXISTS] tb_name1 USING stb_name TAGS (tag_value1, ...) tb_name2 USING stb_name TAGS (tag_value2, ...) ...; + ``` + Create a large number of data tables in batches faster. (Server side 2.0. 14 and above) + + Note: + 1. The method of batch creating tables requires that the data table must use STable as a template. + 2. On the premise of not exceeding the length limit of SQL statements, it is suggested that the number of tables in a single statement should be controlled between 1000 and 3000, which will obtain an ideal speed of table building. + +- **Drop a table** + + ```mysql + DROP TABLE [IF EXISTS] tb_name; + ``` + +- **Show all data table information under the current database** + + ```mysql + SHOW TABLES [LIKE tb_name_wildcar]; + ``` + Show all data table information under the current database. + Note: Wildcard characters can be used to match names in like. The maximum length of this wildcard character string cannot exceed 24 bytes. + Wildcard matching: 1) '%' (percent sign) matches 0 to any number of characters; 2) '_' underscore matches one character. + +- **Modify display character width online** + + ```mysql + SET MAX_BINARY_DISPLAY_WIDTH ; + ``` + +- **Get schema information of a table** + + ```mysql + DESCRIBE tb_name; + ``` + +- **Add a column to table** + + ```mysql + ALTER TABLE tb_name ADD COLUMN field_name data_type; + ``` + Note: + 1. The maximum number of columns is 1024 and the minimum number is 2; + 2. The maximum length of a column name is 64; + +- **Drop a column in table** + + ```mysql + ALTER TABLE tb_name DROP COLUMN field_name; + ``` + If the table is created through a STable, the operation of table schema changing can only be carried out on the STable. Moreover, the schema changes for the STable take effect for all tables created through the schema. For tables that are not created through STables, you can modify the table schema directly. + +## STable Management + +Note: In 2.0. 15.0 and later versions, STABLE reserved words are supported. That is, in the instruction description later in this section, the three instructions of CREATE, DROP and ALTER need to write TABLE instead of STABLE in the old version as the reserved word. + +- **Create a STable** + + ```mysql + CREATE STABLE [IF NOT EXISTS] stb_name (timestamp_field_name TIMESTAMP, field1_name data_type1 [, field2_name data_type2 ...]) TAGS (tag1_name tag_type1, tag2_name tag_type2 [, tag3_name tag_type3]); + ``` + Similiar to a standard table creation SQL, but you need to specify name and type of TAGS field. + + Note: + + 1. Data types of TAGS column cannot be timestamp; + 2. No duplicated TAGS column names; + 3. Reversed word cannot be used as a TAGS column name; + 4. The maximum number of TAGS is 128, and at least 1 TAG allowed, with a total length of no more than 16k characters. + +- **Drop a STable** + + ```mysql + DROP STABLE [IF EXISTS] stb_name; + ``` + Drop a STable automatically deletes all sub-tables created through the STable. + +- **Show all STable information under the current database** + + ```mysql + SHOW STABLES [LIKE tb_name_wildcard]; + ``` + View all STables under the current database and relevant information, including name, creation time, column number, tag number, number of tables created through the STable, etc. + +- **Obtain schema information of a STable** + + ```mysql + DESCRIBE stb_name; + ``` + +- **Add column to STable** + + ```mysql + ALTER STABLE stb_name ADD COLUMN field_name data_type; + ``` + +- **Drop column in STable** + + ```mysql + ALTER STABLE stb_name DROP COLUMN field_name; + ``` + +## TAG Management in STable + +- **Add a tag** + + ```mysql + ALTER STABLE stb_name ADD TAG new_tag_name tag_type; + ``` + Add a new tag to the STable and specify a type of the new tag. The total number of tags cannot exceed 128 and the total length does not exceed 16K characters. + +- **Drop a tag** + + ```mysql + ALTER STABLE stb_name DROP TAG tag_name; + ``` + Delete a tag of STable. After deleting the tag, all sub-tables under the STable will also automatically delete the same tag. + +- **Modify a tag name** + + ```mysql + ALTER STABLE stb_name CHANGE TAG old_tag_name new_tag_name; + ``` + Modify a tag name of STable. After modifying, all sub-tables under the STable will automatically update the new tag name. + +- **Modify a tag value of sub-table** + + ```mysql + ALTER TABLE tb_name SET TAG tag_name=new_tag_value; + ``` + Note: Except that the operation of tag value updating is carried out for sub-tables, all other tag operations (adding tags, deleting tags, etc.) can only be applied to STable, and cannot be operated on a single sub-table. After adding a tag to a STable, all tables established based on that will automatically add a new tag, and the default value is NULL. + +## Data Writing + +- **Insert a record** + + ```mysql + INSERT INTO tb_name VALUES (field_value, ...); + ``` + Insert a record into table tb_name. + +- **Insert a record with data corresponding to a given column** + + ```mysql + INSERT INTO tb_name (field1_name, ...) VALUES (field1_value1, ...); + ``` + Insert a record into table tb_name, and the data corresponds to a given column. For columns that do not appear in the SQL statement, database will automatically populate them with NULL. Primary key (timestamp) cannot be NULL. + +- **Insert multiple records** + + ```mysql + INSERT INTO tb_name VALUES (field1_value1, ...) (field1_value2, ...) ...; + ``` + Insert multiple records into table tb_name. + +- **Insert multiple records into a given column** + + ```mysql + INSERT INTO tb_name (field1_name, ...) VALUES (field1_value1, ...) (field1_value2, ...) ...; + ``` + Insert multiple records into a given column of table tb_name. + +- **Insert multiple records into multiple tables** + + ```mysql + INSERT INTO tb1_name VALUES (field1_value1, ...) (field1_value2, ...) ... + tb2_name VALUES (field1_value1, ...) (field1_value2, ...) ...; + ``` + Insert multiple records into tables tb1_name and tb2_name at the same time. + +- **Insert multiple records per column into multiple tables** + + ```mysql + INSERT INTO tb1_name (tb1_field1_name, ...) VALUES (field1_value1, ...) (field1_value2, ...) ... + tb2_name (tb2_field1_name, ...) VALUES (field1_value1, ...) (field1_value2, ...) ...; + ``` + Insert multiple records per column into tables tb1_name and tb2_name at the same time. + Note: The timestamp of the oldest record allowed to be inserted is relative to the current server time, minus the configured keep value (days of data retention), and the timestamp of the latest record allowed to be inserted is relative to the current server time, plus the configured days value (interval of data storage in the data file, in days). Both keep and days can be specified when the database is created, and the default values are 3650 days and 10 days, respectively. + +- Automatically create a table when inserting + + ```mysql + INSERT INTO tb_name USING stb_name TAGS (tag_value1, ...) VALUES (field_value1, ...); + ``` + If user is not sure whether a table exists when writing data, the automatic table building syntax can be used to create a non-existent table when writing. If the table already exists, no new table will be created. When automatically creating a table, it is required to use the STable as a template and specify tags value for the data table. + +- **Automatically create a table when inserting, and specify a given tags column** + + ```mysql + INSERT INTO tb_name USING stb_name (tag_name1, ...) TAGS (tag_value1, ...) VALUES (field_value1, ...); + ``` + During automatic table creation, only the values of some tags columns can be specified, and the unspecified tags columns will be null. + +**History writing**: The IMPORT or INSERT command can be used. The syntax and function of IMPORT are exactly the same as those of INSERT. + +Note: For SQL statements in insert type, the stream parsing strategy we adopt will still execute the correct part of SQL before the following errors are found. In the following sql, insert statement is invalid, but d1001 will still be created. + +```mysql +taos> CREATE TABLE meters(ts TIMESTAMP, current FLOAT, voltage INT, phase FLOAT) TAGS(location BINARY(30), groupId INT); +Query OK, 0 row(s) affected (0.008245s) + +taos> SHOW STABLES; + name | created_time | columns | tags | tables | +============================================================================================ + meters | 2020-08-06 17:50:27.831 | 4 | 2 | 0 | +Query OK, 1 row(s) in set (0.001029s) + +taos> SHOW TABLES; +Query OK, 0 row(s) in set (0.000946s) + +taos> INSERT INTO d1001 USING meters TAGS('Beijing.Chaoyang', 2) VALUES('a'); + +DB error: invalid SQL: 'a' (invalid timestamp) (0.039494s) + +taos> SHOW TABLES; + table_name | created_time | columns | stable_name | +====================================================================================================== + d1001 | 2020-08-06 17:52:02.097 | 4 | meters | +Query OK, 1 row(s) in set (0.001091s) +``` + +## Data Query + +### Query Syntax: + +```mysql +SELECT select_expr [, select_expr ...] + FROM {tb_name_list} + [WHERE where_condition] + [INTERVAL (interval_val [, interval_offset])] + [SLIDING sliding_val] + [FILL fill_val] + [GROUP BY col_list] + [ORDER BY col_list { DESC | ASC }] + [SLIMIT limit_val [SOFFSET offset_val]] + [LIMIT limit_val [OFFSET offset_val]] + [>> export_file]; +``` + +#### SELECT Clause + +A select clause can be a subquery of UNION and another query. + +#### Wildcard character + +The wildcard \* can be used to refer to all columns. For ordinary tables, there’re only ordinary columns in results. + +```mysql +taos> SELECT * FROM d1001; + ts | current | voltage | phase | +====================================================================================== + 2018-10-03 14:38:05.000 | 10.30000 | 219 | 0.31000 | + 2018-10-03 14:38:15.000 | 12.60000 | 218 | 0.33000 | + 2018-10-03 14:38:16.800 | 12.30000 | 221 | 0.31000 | +Query OK, 3 row(s) in set (0.001165s) +``` + +For Stables, wildcards contain *tag columns*. + +```mysql +taos> SELECT * FROM meters; + ts | current | voltage | phase | location | groupid | +===================================================================================================================================== + 2018-10-03 14:38:05.500 | 11.80000 | 221 | 0.28000 | Beijing.Haidian | 2 | + 2018-10-03 14:38:16.600 | 13.40000 | 223 | 0.29000 | Beijing.Haidian | 2 | + 2018-10-03 14:38:05.000 | 10.80000 | 223 | 0.29000 | Beijing.Haidian | 3 | + 2018-10-03 14:38:06.500 | 11.50000 | 221 | 0.35000 | Beijing.Haidian | 3 | + 2018-10-03 14:38:04.000 | 10.20000 | 220 | 0.23000 | Beijing.Chaoyang | 3 | + 2018-10-03 14:38:16.650 | 10.30000 | 218 | 0.25000 | Beijing.Chaoyang | 3 | + 2018-10-03 14:38:05.000 | 10.30000 | 219 | 0.31000 | Beijing.Chaoyang | 2 | + 2018-10-03 14:38:15.000 | 12.60000 | 218 | 0.33000 | Beijing.Chaoyang | 2 | + 2018-10-03 14:38:16.800 | 12.30000 | 221 | 0.31000 | Beijing.Chaoyang | 2 | +Query OK, 9 row(s) in set (0.002022s) +``` + +Wildcards support table name prefixes, the two following SQL statements will return all columns: + +```mysql +SELECT * FROM d1001; +SELECT d1001.* FROM d1001; +``` + +In Join query, the results returned by \* with prefix and \* without prefix are different. \* returns all column data of all tables (excluding tags), while wildcards with prefix only return column data of the corresponding table. + +```mysql +taos> SELECT * FROM d1001, d1003 WHERE d1001.ts=d1003.ts; + ts | current | voltage | phase | ts | current | voltage | phase | +================================================================================================================================== + 2018-10-03 14:38:05.000 | 10.30000| 219 | 0.31000 | 2018-10-03 14:38:05.000 | 10.80000| 223 | 0.29000 | +Query OK, 1 row(s) in set (0.017385s) +``` +```mysql +taos> SELECT d1001.* FROM d1001,d1003 WHERE d1001.ts = d1003.ts; + ts | current | voltage | phase | +====================================================================================== + 2018-10-03 14:38:05.000 | 10.30000 | 219 | 0.31000 | +Query OK, 1 row(s) in set (0.020443s) +``` + +In the process of using SQL functions for query, some SQL functions support wildcard operation. The difference is that the `count(\*)` function returns only one column, but the `first`,`last`,`last_row` functions return all columns. + +```mysql +taos> SELECT COUNT(*) FROM d1001; + count(*) | +======================== + 3 | +Query OK, 1 row(s) in set (0.001035s) +``` + +```mysql +taos> SELECT FIRST(*) FROM d1001; + first(ts) | first(current) | first(voltage) | first(phase) | +========================================================================================= + 2018-10-03 14:38:05.000 | 10.30000 | 219 | 0.31000 | +Query OK, 1 row(s) in set (0.000849s) +``` + +#### Tag Column + +Since version 2.0. 14, it is supported to specify *tag column* in queries of ordinary tables, and the values of tag columns will be returned together with the data of other ordinary columns. + +```mysql +taos> SELECT location, groupid, current FROM d1001 LIMIT 2; + location | groupid | current | +====================================================================== + Beijing.Chaoyang | 2 | 10.30000 | + Beijing.Chaoyang | 2 | 12.60000 | +Query OK, 2 row(s) in set (0.003112s) +``` + +Note: The wildcard \* of ordinary tables does not contain *tag columns*. + +#### Obtain the de-duplicated value of a tag column + +Since version 2.0. 15, it is supported to specify `DISTINCT` keyword when querying tag columns in STables, which will return all non-duplicate values of given tag columns. + +```mysql +SELECT DISTINCT tag_name FROM stb_name; +``` + +Note: At present, `DISTINCT` keyword only supports deduplication of tag columns of STables, and cannot be used for ordinary columns. + +#### Column name in result set + +In `SELECT` clause, if there’s no returning of column name in result set, the result set column name defaults to the expression name in `SELECT` clause as the column name. In addition, user can use `AS` to rename the columns in the returned result set. For example: + +```mysql +taos> SELECT ts, ts AS primary_key_ts FROM d1001; + ts | primary_key_ts | +==================================================== + 2018-10-03 14:38:05.000 | 2018-10-03 14:38:05.000 | + 2018-10-03 14:38:15.000 | 2018-10-03 14:38:15.000 | + 2018-10-03 14:38:16.800 | 2018-10-03 14:38:16.800 | +Query OK, 3 row(s) in set (0.001191s) +``` + +However, renaming for one single column is not supported for `first(*)`,`last(*)`,`last_row(*)`. + +#### Implicit result column + +`Select_exprs` can be the name of a column belongs to a table, or it can be a column-based functional expression or calculation formula, with an upper limit of 256. When user uses `interval` or `group by tags` clause, the timestamp column (the first column) and the tag column in `group by` clause are forced to be returned in the final returned result. Later versions can support turning off the output of implicit columns in `group by` clause, and the column output is completely controlled by select clause. + +#### List of STable + +The `FROM` keyword can be followed by a list of several tables (STables) or result of a subquery. + +If you do not specify user's current database, you can use the database name before the table name to specify the database to which the table belongs. For example: `power.d1001` to use tables across databases. + +```mysql +SELECT * FROM power.d1001; +------------------------------ +USE power; +SELECT * FROM d1001; +``` + +#### Special Functions + +Some special query functions can be performed without using FROM clause. Obtain the current database database (): + +```mysql +taos> SELECT DATABASE(); + database() | +================================= + power | +Query OK, 1 row(s) in set (0.000079s) +``` + +If no default database is specified when logging in, and `USE` command is not used to switch data, then `NULL` is returned. + +```mysql +taos> SELECT DATABASE(); + database() | +================================= + NULL | +Query OK, 1 row(s) in set (0.000184s) +``` + +Get server and client version numbers: + +```mysql +taos> SELECT CLIENT_VERSION(); + client_version() | +=================== + 2.0.0.0 | +Query OK, 1 row(s) in set (0.000070s) + +taos> SELECT SERVER_VERSION(); + server_version() | +=================== + 2.0.0.0 | +Query OK, 1 row(s) in set (0.000077s) +``` + +A server state detection statement. If server is normal, return a number (for example, 1). If server is exceptional, return error code. The SQL syntax can be compatible with the check of TDengine status by connection pool and the check of database server status by third-party tools. And can avoid connection loss of connection pool caused by using a wrong heartbeat detection SQL statement. + +```mysql +taos> SELECT SERVER_STATUS(); + server_status() | +================== + 1 | +Query OK, 1 row(s) in set (0.000074s) + +taos> SELECT SERVER_STATUS() AS status; + status | +============== + 1 | +Query OK, 1 row(s) in set (0.000081s) +``` + +#### Special keywords in TAOS SQL + +> `TBNAME`: It can be regarded as a special tag in a STable query, representing the name of sub-table involved in the query +> +> _c0: Represents the first column of a table (STable) + +#### Tips + +Get all sub-table names and related tags information of a STable: + +```mysql +SELECT TBNAME, location FROM meters; +``` + +Statistics of sub-tables number under a STable: + +```mysql +SELECT COUNT(TBNAME) FROM meters; +``` + +The two queries above only support adding filters for TAGS in Where conditional clause. For example: + +```mysql +taos> SELECT TBNAME, location FROM meters; + tbname | location | +================================================================== + d1004 | Beijing.Haidian | + d1003 | Beijing.Haidian | + d1002 | Beijing.Chaoyang | + d1001 | Beijing.Chaoyang | +Query OK, 4 row(s) in set (0.000881s) + +taos> SELECT COUNT(tbname) FROM meters WHERE groupId > 2; + count(tbname) | +======================== + 2 | +Query OK, 1 row(s) in set (0.001091s) +``` + +- You can use \* to return all columns, or given column names. Four operations can be performed on numeric columns, and column names can be given to output columns. +- `WHERE` statement can use various logical decisions to filter numeric values, or wildcards to filter strings +- The output is sorted by default in ascending order by timestamps in the first column, but you can specify descending order (\_c0 refers to the first column timestamp). It is illegal to use ORDER BY to sort other fields. +- Parameter LIMIT controls the number of outputs, and OFFSET specifies which output starts from. LIMIT/OFFSET executes the result set after ORDER BY. +- "> >" output can be exported to a specified file + +#### Supported Filtering Operations + +| **Operation** | **Note** | **Applicable Data Types** | +| ------------- | ----------------------------- | ----------------------------------- | +| > | larger than | **timestamp** and all numeric types | +| < | smaller than | **timestamp** and all numeric types | +| >= | larger than or equal to | **timestamp** and all numeric types | +| <= | smaller than or equal to | **timestamp** and all numeric types | +| = | equal to | all types | +| <> | not equal to | all types | +| between and | within a certain range | **timestamp** and all numeric types | +| % | match with any char sequences | **binary** **nchar** | +| _ | match with a single char | **binary** **nchar** | + +1. To filter the range of multiple fields at the same time, you need to use keyword AND to connect different query conditions. The query filtering between different columns connected by OR are not supported at the moment. +2. For filtering a single field, if it is a time filtering condition, only one condition in a statement can be set; however, for other (ordinary) columns or tag columns, OR keyword can be used for query filtering of combined conditions. For example: ((value > 20 AND value < 30) OR (value < 12)). +3. Since version 2.0. 17, condition filtering supports BETWEEN AND syntax. For example, WHERE col2 BETWEEN 1.5 AND 3.25 means that the query condition is "1.5 ≤ col2 ≤ 3.25". + +### SQL Example + +- For example, table tb1 is created with the following statement + + ```mysql + CREATE TABLE tb1 (ts TIMESTAMP, col1 INT, col2 FLOAT, col3 BINARY(50)); + ``` + +- Query all records of the last hour of tb1 + + ```mysql + SELECT * FROM tb1 WHERE ts >= NOW - 1h; + ``` + +- Look up table tb1 from 2018-06-01 08:00:00. 000 to 2018-06-02 08:00:00. 000, and col3 string is a record ending in'nny ', and the result is in descending order of timestamp: + + ```mysql + SELECT * FROM tb1 WHERE ts > '2018-06-01 08:00:00.000' AND ts <= '2018-06-02 08:00:00.000' AND col3 LIKE '%nny' ORDER BY ts DESC; + ``` + +- Query the sum of col1 and col2, and name it complex. The time is greater than 2018-06-01 08:00:00. 000, and col2 is greater than 1.2. As a result, only 10 records are outputted, starting from item 5 + + ```mysql + SELECT (col1 + col2) AS 'complex' FROM tb1 WHERE ts > '2018-06-01 08:00:00.000' AND col2 > 1.2 LIMIT 10 OFFSET 5; + ``` + +- Query the records of past 10 minutes, the value of col2 is greater than 3.14, and output the result to the file /home/testoutpu.csv. + + ```mysql + SELECT COUNT(*) FROM tb1 WHERE ts >= NOW - 10m AND col2 > 3.14 >> /home/testoutpu.csv; + ``` + + + +## SQL Functions + +TDengine supports aggregations over data, they are listed below: + +- **COUNT** + + ```mysql + SELECT COUNT([*|field_name]) FROM tb_name [WHERE clause]; + ``` + Function: record the number of rows or non-null values in a column of statistics/STable. + + Returned result data type: long integer INT64. + + Applicable Fields: Applied to all fields. + + Applied to: **table, STable**. + + Note: + 1. You can use \* instead of specific fields, and use *() to return the total number of records. + 2. The query results for fields of the same table (excluding NULL values) are the same. + 3. If the statistic object is a specific column, return the number of records with non-NULL values in that column. + + Example: + + ```mysql + taos> SELECT COUNT(*), COUNT(voltage) FROM meters; + count(*) | count(voltage) | + ================================================ + 9 | 9 | + Query OK, 1 row(s) in set (0.004475s) + + taos> SELECT COUNT(*), COUNT(voltage) FROM d1001; + count(*) | count(voltage) | + ================================================ + 3 | 3 | + Query OK, 1 row(s) in set (0.001075s) + ``` + +- **AVG** + + ```mysql + SELECT AVG(field_name) FROM tb_name [WHERE clause]; + ``` + Function: return the average value of a column in statistics/STable. + + Return Data Type: double. + + Applicable Fields: all types except timestamp, binary, nchar, bool. + + Applied to: **table,STable**. + + Example: + + ```mysql + taos> SELECT AVG(current), AVG(voltage), AVG(phase) FROM meters; + avg(current) | avg(voltage) | avg(phase) | + ==================================================================================== + 11.466666751 | 220.444444444 | 0.293333333 | + Query OK, 1 row(s) in set (0.004135s) + + taos> SELECT AVG(current), AVG(voltage), AVG(phase) FROM d1001; + avg(current) | avg(voltage) | avg(phase) | + ==================================================================================== + 11.733333588 | 219.333333333 | 0.316666673 | + Query OK, 1 row(s) in set (0.000943s) + ``` + +- **TWA** + + ```mysql + SELECT TWA(field_name) FROM tb_name WHERE clause; + ``` + + Function: Time weighted average function. The time-weighted average of a column in a statistical table over a period of time. + + Return Data Type: double. + + Applicable Fields: all types except timestamp, binary, nchar, bool. + + Applied to: **table**. + +- **SUM** + + ```mysql + SELECT SUM(field_name) FROM tb_name [WHERE clause]; + ``` + + Function: return the sum of a statistics/STable. + + Return Data Type: long integer INMT64 and Double. + + Applicable Fields: All types except timestamp, binary, nchar, bool. + + Applied to: **table,STable**. + + Example: + + ```mysql + taos> SELECT SUM(current), SUM(voltage), SUM(phase) FROM meters; + sum(current) | sum(voltage) | sum(phase) | + ================================================================================ + 103.200000763 | 1984 | 2.640000001 | + Query OK, 1 row(s) in set (0.001702s) + + taos> SELECT SUM(current), SUM(voltage), SUM(phase) FROM d1001; + sum(current) | sum(voltage) | sum(phase) | + ================================================================================ + 35.200000763 | 658 | 0.950000018 | + Query OK, 1 row(s) in set (0.000980s) + ``` + +- **STDDEV** + + ```mysql + SELECT STDDEV(field_name) FROM tb_name [WHERE clause]; + ``` + + Function: Mean square deviation of a column in statistics table. + + Return Data Type: Double. + + Applicable Fields: All types except timestamp, binary, nchar, bool. + + Applied to: **table**. (also support **STable** since version 2.0.15.1) + + Example: + + ```mysql + taos> SELECT STDDEV(current) FROM d1001; + stddev(current) | + ============================ + 1.020892909 | + Query OK, 1 row(s) in set (0.000915s) + ``` + +- **LEASTSQUARES** + ```mysql + SELECT LEASTSQUARES(field_name, start_val, step_val) FROM tb_name [WHERE clause]; + ``` + Function: Value of a column in statistical table is a fitting straight equation of primary key (timestamp). Start_val is the initial value of independent variable, and step_val is the step size value of independent variable. + + Return Data Type: String expression (slope, intercept). + + Applicable Fields: All types except timestamp, binary, nchar, bool. + + Note: Independent variable is the timestamp, and dependent variable is the value of the column. + + Applied to: **table**. + + Example: + ```mysql + taos> SELECT LEASTSQUARES(current, 1, 1) FROM d1001; + leastsquares(current, 1, 1) | + ===================================================== + {slop:1.000000, intercept:9.733334} | + Query OK, 1 row(s) in set (0.000921s) + ``` + +### Selector Functions + +- **MIN** + ```mysql + SELECT MIN(field_name) FROM {tb_name | stb_name} [WHERE clause]; + ``` + Function: return the minimum value of a specific column in statistics/STable. + + Return Data Type: Same as applicable fields. + + Applicable Fields: All types except timestamp, binary, nchar, bool. + + Example: + + ```mysql + taos> SELECT MIN(current), MIN(voltage) FROM meters; + min(current) | min(voltage) | + ====================================== + 10.20000 | 218 | + Query OK, 1 row(s) in set (0.001765s) + + taos> SELECT MIN(current), MIN(voltage) FROM d1001; + min(current) | min(voltage) | + ====================================== + 10.30000 | 218 | + Query OK, 1 row(s) in set (0.000950s) + ``` + +- **MAX** + + ```mysql + SELECT MAX(field_name) FROM { tb_name | stb_name } [WHERE clause]; + ``` + + Function: return the maximum value of a specific column in statistics/STable. + + Return Data Type: Same as applicable fields. + + Applicable Fields: All types except timestamp, binary, nchar, bool. + + Example: + + ```mysql + taos> SELECT MAX(current), MAX(voltage) FROM meters; + max(current) | max(voltage) | + ====================================== + 13.40000 | 223 | + Query OK, 1 row(s) in set (0.001123s) + + taos> SELECT MAX(current), MAX(voltage) FROM d1001; + max(current) | max(voltage) | + ====================================== + 12.60000 | 221 | + Query OK, 1 row(s) in set (0.000987s) + ``` + +- **FIRST** + + ```mysql + SELECT FIRST(field_name) FROM { tb_name | stb_name } [WHERE clause]; + ``` + + Function: The first non-NULL value written into a column in statistics/STable. + + Return Data Type: Same as applicable fields. + + Applicable Fields: All types. + + Note: + 1. To return the first (minimum timestamp) non-NULL value of each column, use FIRST (\*); + 2. if all columns in the result set are NULL values, the return result of the column is also NULL; + 3. If all columns in the result set are NULL values, no result is returned. + + Example: + + ```mysql + taos> SELECT FIRST(*) FROM meters; + first(ts) | first(current) | first(voltage) | first(phase) | + ========================================================================================= + 2018-10-03 14:38:04.000 | 10.20000 | 220 | 0.23000 | + Query OK, 1 row(s) in set (0.004767s) + + taos> SELECT FIRST(current) FROM d1002; + first(current) | + ======================= + 10.20000 | + Query OK, 1 row(s) in set (0.001023s) + ``` + +- + +- **LAST** + + ```mysql + SELECT LAST(field_name) FROM { tb_name | stb_name } [WHERE clause]; + ``` + + Function: The last non-NULL value written by the value of a column in statistics/STable. + + Return Data Type: Same as applicable fields. + + Applicable Fields: All types. + + Note: + 1. To return the last (maximum timestamp) non-NULL value of each column, use LAST (\*); + 2. If a column in the result set has a NULL value, the returned result of the column is also NULL; if all columns in the result set have NULL values, no result is returned. + + Example: + + ```mysql + taos> SELECT LAST(*) FROM meters; + last(ts) | last(current) | last(voltage) | last(phase) | + ======================================================================================== + 2018-10-03 14:38:16.800 | 12.30000 | 221 | 0.31000 | + Query OK, 1 row(s) in set (0.001452s) + + taos> SELECT LAST(current) FROM d1002; + last(current) | + ======================= + 10.30000 | + Query OK, 1 row(s) in set (0.000843s) + ``` + +- **TOP** + + ```mysql + SELECT TOP(field_name, K) FROM { tb_name | stb_name } [WHERE clause]; + ``` + Function: The top k non-NULL values of a column in statistics/STable. If there are more than k column values tied for the largest, the one with smaller timestamp is returned. + + Return Data Type: Same as applicable fields. + + Applicable Fields: All types except timestamp, binary, nchar, bool. + + Note: + 1. The range of *k* value is 1≤*k*≤100; + 2. System also returns the timestamp column associated with the record. + + Example: + + ```mysql + taos> SELECT TOP(current, 3) FROM meters; + ts | top(current, 3) | + ================================================= + 2018-10-03 14:38:15.000 | 12.60000 | + 2018-10-03 14:38:16.600 | 13.40000 | + 2018-10-03 14:38:16.800 | 12.30000 | + Query OK, 3 row(s) in set (0.001548s) + + taos> SELECT TOP(current, 2) FROM d1001; + ts | top(current, 2) | + ================================================= + 2018-10-03 14:38:15.000 | 12.60000 | + 2018-10-03 14:38:16.800 | 12.30000 | + Query OK, 2 row(s) in set (0.000810s) + ``` + +- **BOTTOM** + + ```mysql + SELECT BOTTOM(field_name, K) FROM { tb_name | stb_name } [WHERE clause]; + ``` + Function: The last k non-NULL values of a column in statistics/STable. If there are more than k column values tied for the smallest, the one with smaller timestamp is returned. + + Return Data Type: Same as applicable fields. + + Applicable Fields: All types except timestamp, binary, nchar, bool. + + Note: + 1. The range of *k* value is 1≤*k*≤100; + 2. System also returns the timestamp column associated with the record. + + Example: + + ```mysql + taos> SELECT BOTTOM(voltage, 2) FROM meters; + ts | bottom(voltage, 2) | + =============================================== + 2018-10-03 14:38:15.000 | 218 | + 2018-10-03 14:38:16.650 | 218 | + Query OK, 2 row(s) in set (0.001332s) + + taos> SELECT BOTTOM(current, 2) FROM d1001; + ts | bottom(current, 2) | + ================================================= + 2018-10-03 14:38:05.000 | 10.30000 | + 2018-10-03 14:38:16.800 | 12.30000 | + Query OK, 2 row(s) in set (0.000793s) + ``` + +- **PERCENTILE** + ```mysql + SELECT PERCENTILE(field_name, P) FROM { tb_name } [WHERE clause]; + ``` + Function: Percentile of the value of a column in statistical table. + + Return Data Type: Double. + + Applicable Fields: All types except timestamp, binary, nchar, bool. + + Note: The range of P value is 0 ≤ P ≤ 100. P equals to MIN when, and equals MAX when it’s 100. + + Example: + + ```mysql + taos> SELECT PERCENTILE(current, 20) FROM d1001; + percentile(current, 20) | + ============================ + 11.100000191 | + Query OK, 1 row(s) in set (0.000787s) + ``` + +- **APERCENTILE** + ```mysql + SELECT APERCENTILE(field_name, P) FROM { tb_name | stb_name } [WHERE clause]; + ``` + Function: The value percentile of a column in statistical table is similar to the PERCENTILE function, but returns approximate results. + + Return Data Type: Double. + + Applicable Fields: All types except timestamp, binary, nchar, bool. + + Note: The range of *P* value is 0 ≤ *P* ≤ 100. *P* equals to MIN when, and equals MAX when it’s 100. APERCENTILE function is recommended, which performs far better than PERCENTILE function. + +- **LAST_ROW** + ```mysql + SELECT LAST_ROW(field_name) FROM { tb_name | stb_name }; + ``` + Function: Return the last record of a table (STtable). + + Return Data Type: Double. + + Applicable Fields: All types. + + Note: Unlike last function, last_row does not support time range restriction and forces the last record to be returned. + + Example: + + ```mysql + taos> SELECT LAST_ROW(current) FROM meters; + last_row(current) | + ======================= + 12.30000 | + Query OK, 1 row(s) in set (0.001238s) + + taos> SELECT LAST_ROW(current) FROM d1002; + last_row(current) | + ======================= + 10.30000 | + Query OK, 1 row(s) in set (0.001042s) + ``` + +### Computing Functions + +- **DIFF** + ```mysql + SELECT DIFF(field_name) FROM tb_name [WHERE clause]; + ``` + Function: Return the value difference between a column and the previous column. + + Return Data Type: Same as applicable fields. + + Applicable Fields: All types except timestamp, binary, nchar, bool. + + Note: The number of output result lines is the total number of lines in the range minus one, and there is no result output in the first line. + + Example: + + ```mysql + taos> SELECT DIFF(current) FROM d1001; + ts | diff(current) | + ================================================= + 2018-10-03 14:38:15.000 | 2.30000 | + 2018-10-03 14:38:16.800 | -0.30000 | + Query OK, 2 row(s) in set (0.001162s) + ``` + +- **SPREAD** + + ```mysql + SELECT SPREAD(field_name) FROM { tb_name | stb_name } [WHERE clause]; + ``` + Function: Return the difference between the max value and the min value of a column in statistics /STable. + + Return Data Type: Double. + + Applicable Fields: All types except binary, nchar, bool. + + Note: Applicable for TIMESTAMP field, which indicates the time range of a record. + + Example: + + ```mysql + taos> SELECT SPREAD(voltage) FROM meters; + spread(voltage) | + ============================ + 5.000000000 | + Query OK, 1 row(s) in set (0.001792s) + + taos> SELECT SPREAD(voltage) FROM d1001; + spread(voltage) | + ============================ + 3.000000000 | + Query OK, 1 row(s) in set (0.000836s) + ``` + +- **Four Operations** + + ```mysql + SELECT field_name [+|-|*|/|%][Value|field_name] FROM { tb_name | stb_name } [WHERE clause]; + ``` + Function: Calculation results of addition, subtraction, multiplication, division and remainder of values in a column or among multiple columns in statistics/STable. + + Returned Data Type: Double. + + Applicable Fields: All types except timestamp, binary, nchar, bool. + + Note: + + 1. Calculation between two or more columns is supported, and the calculation priorities can be controlled by parentheses(); + 2. The NULL field does not participate in the calculation. If a row involved in calculation contains NULL, the calculation result of the row is NULL. + +## Time-dimension Aggregation + +TDengine supports aggregating by intervals. Data in a table can partitioned by intervals and aggregated to generate results. For example, a temperature sensor collects data once per second, but the average temperature needs to be queried every 10 minutes. This aggregation is suitable for down sample operation, and the syntax is as follows: + +```mysql +SELECT function_list FROM tb_name + [WHERE where_condition] + INTERVAL (interval [, offset]) + [SLIDING sliding] + [FILL ({NONE | VALUE | PREV | NULL | LINEAR | NEXT})] + +SELECT function_list FROM stb_name + [WHERE where_condition] + INTERVAL (interval [, offset]) + [SLIDING sliding] + [FILL ({ VALUE | PREV | NULL | LINEAR | NEXT})] + [GROUP BY tags] +``` + +- The length of aggregation interval is specified by keyword INTERVAL, the min time interval is 10 milliseconds (10a), and offset is supported (the offset must be less than interval). In aggregation queries, the aggregator and selector functions that can be executed simultaneously are limited to functions with one single output: count, avg, sum, stddev, leastsquares, percentile, min, max, first, last. Functions with multiple rows of output results (such as top, bottom, diff, and four operations) cannot be used. + +- WHERE statement specifies the start and end time of a query and other filters + +- FILL statement specifies a filling mode when data missed in a certain interval. Applicable filling modes include the following: + + 1. Do not fill: NONE (default filingl mode). + 2. VALUE filling: Fixed value filling, where the filled value needs to be specified. For example: fill (VALUE, 1.23). + 3. NULL filling: Fill the data with NULL. For example: fill (NULL). + 4. PREV filling: Filling data with the previous non-NULL value. For example: fill (PREV). + 5. NEXT filling: Filling data with the next non-NULL value. For example: fill (NEXT). + +Note: + + 1. When using a FILL statement, a large number of filling outputs may be generated. Be sure to specify the time interval for the query. For each query, system can return no more than 10 million results with interpolation. + 2. In a time-dimension aggregation, the time-series in returned results increases strictly monotonously. + 3. If the query object is a STable, the aggregator function will act on the data of all tables under the STable that meet the value filters. If group by statement is not used in the query, the returned result increases strictly monotonously according to time-series; If group by statement is used to group in the query, each group in the returned result does not increase strictly monotonously according to time-series. + +Example: The statement for building a database for smart meter is as follows: + +```mysql +CREATE TABLE meters (ts TIMESTAMP, current FLOAT, voltage INT, phase FLOAT) TAGS (location BINARY(64), groupId INT); +``` + +According to the data collected by the smart meter, the average value, maximum value, median current of current data in the past 24 hours are calculated in a phase of 10 minutes, and the current trend with time changes is fitted to a straight line. If there is no calculated value, fill it with the previous non-NULL value. The query statement used is as follows: + +```mysql +SELECT AVG(current), MAX(current), LEASTSQUARES(current, start_val, step_val), PERCENTILE(current, 50) FROM meters + WHERE ts>=NOW-1d + INTERVAL(10m) + FILL(PREV); +``` + +## TAOS SQL Boundary Restrictions + +- Max database name length is 32 +- Max length of table name is 192, and max length of each data row is 16k characters +- Max length of column name is 64, max number of columns allowed is 1024, and min number of columns allowed is 2. The first column must be a timestamp +- Max number of tags allowed is 128, down to 1, and total length of tags does not exceed 16k characters +- Max length of SQL statement is 65480 characters, but it can be modified by system configuration parameter maxSQLLength, and max length can be configured to 1M +- Number of databases, STables and tables are not limited by system, but only limited by system resources + +## Other TAOS SQL Conventions + +**Restrictions on group by** + +TAOS SQL supports group by operation on tags, tbnames and ordinary columns, required that only one column and whichhas less than 100,000 unique values. + +**Restrictions on join operation** + +TAOS SQL supports join columns of two tables by Primary Key timestamp between them, and does not support four operations after tables aggregated for the time being. + +**Availability of is no null** + +Is not null supports all types of columns. Non-null expression is < > "" and only applies to columns of non-numeric types. diff --git a/documentation20/en/13.faq/docs.md b/documentation20/en/13.faq/docs.md new file mode 100644 index 0000000000000000000000000000000000000000..05507e26e5ab84a01e19d9ecced5e0464c1411f3 --- /dev/null +++ b/documentation20/en/13.faq/docs.md @@ -0,0 +1,161 @@ +# FAQ + +Tutorials & FAQ + +## 0.How to report an issue? + +If the contents in FAQ cannot help you and you need the technical support and assistance of TDengine team, please package the contents in the following two directories: + +1./var/log/taos (if default path has not been modified) + +2./etc/taos + +Provide the necessary description of the problem, including the version information of TDengine used, the platform environment information, the execution operation of the problem, the characterization of the problem and the approximate time, and submit the Issue on [GitHub](https://github.com/taosdata/TDengine). + +To ensure that there is enough debug information, if the problem can be repeated, please modify the/etc/taos/taos.cfg file, add a line of "debugFlag 135" at the end (without quotation marks themselves), then restart taosd, repeat the problem, and then submit. You can also temporarily set the log level of taosd through the following SQL statement. + +``` + alter dnode debugFlag 135; +``` + +However, when the system is running normally, please set debugFlag to 131, otherwise a large amount of log information will be generated and the system efficiency will be reduced. + +## 1.What should I pay attention to when upgrading TDengine from older versions to 2.0 and above? ☆☆☆ + +Version 2.0 is a complete refactoring of the previous version, and the configuration and data files are incompatible. Be sure to do the following before upgrading: + +1. Delete the configuration file, execute sudo rm `-rf /etc/taos/taos.cfg` +2. Delete the log file, execute `sudo rm -rf /var/log/taos/` +3. By ensuring that the data is no longer needed, delete the data file and execute `sudo rm -rf /var/lib/taos/` +4. Install the latest stable version of TDengine +5. If you need to migrate data or the data file is corrupted, please contact the official technical support team of TAOS Data to assist + +## 2. When encoutered with the error " Unable to establish connection " in Windows, what can I do? + +See the [technical blog](https://www.taosdata.com/blog/2019/12/03/jdbcdriver%E6%89%BE%E4%B8%8D%E5%88%B0%E5%8A%A8%E6%80%81%E9%93%BE%E6%8E%A5%E5%BA%93/) for this issue. + +## 3. Why I get “more dnodes are needed” when create a table? + +See the [technical blog](https://www.taosdata.com/blog/2019/12/03/%E5%88%9B%E5%BB%BA%E6%95%B0%E6%8D%AE%E8%A1%A8%E6%97%B6%E6%8F%90%E7%A4%BAmore-dnodes-are-needed/) for this issue. + +## 4. How do I generate a core file when TDengine crashes? + +See the [technical blog](https://www.taosdata.com/blog/2019/12/06/tdengine-crash%E6%97%B6%E7%94%9F%E6%88%90core%E6%96%87%E4%BB%B6%E7%9A%84%E6%96%B9%E6%B3%95/) for this issue. + +## 5. What should I do if I encounter an error "Unable to establish connection"? + +When the client encountered a connection failure, please follow the following steps to check: + +1. Check your network environment + +2. - Cloud server: Check whether the security group of the cloud server opens access to TCP/UDP ports 6030-6042 + - Local virtual machine: Check whether the network can be pinged, and try to avoid using localhost as hostname + - Corporate server: If you are in a NAT network environment, be sure to check whether the server can return messages to the client + +2. Make sure that the client and server version numbers are exactly the same, and the open source Community Edition and Enterprise Edition cannot be mixed. +3. On the server, execute systemctl status taosd to check the running status of *taosd*. If not running, start *taosd*. +4. Verify that the correct server FQDN (Fully Qualified Domain Name, which is available by executing the Linux command hostname-f on the server) is specified when the client connects. FQDN configuration reference: "[All about FQDN of TDengine](https://www.taosdata.com/blog/2020/09/11/1824.html)". +5. Ping the server FQDN. If there is no response, please check your network, DNS settings, or the system hosts file of the computer where the client is located. +6. Check the firewall settings (Ubuntu uses ufw status, CentOS uses firewall-cmd-list-port) to confirm that TCP/UDP ports 6030-6042 are open. +7. For JDBC (ODBC, Python, Go and other interfaces are similar) connections on Linux, make sure that libtaos.so is in the directory /usr/local/taos/driver, and /usr/local/taos/driver is in the system library function search path LD_LIBRARY_PATH. +8. For JDBC, ODBC, Python, Go, etc. connections on Windows, make sure that C:\ TDengine\ driver\ taos.dll is in your system library function search directory (it is recommended that taos.dll be placed in the directory C:\ Windows\ System32) +9. If the connection issue still exist + +1. - On Linux system, please use the command line tool nc to determine whether the TCP and UDP connections on the specified ports are unobstructed. Check whether the UDP port connection works: nc -vuz {hostIP} {port} Check whether the server-side TCP port connection works: nc -l {port}Check whether the client-side TCP port connection works: nc {hostIP} {port} + - Windows systems use the PowerShell command Net-TestConnection-ComputerName {fqdn} Port {port} to detect whether the service-segment port is accessed + +10. You can also use the built-in network connectivity detection function of taos program to verify whether the specified port connection between the server and the client is unobstructed (including TCP and UDP): [TDengine's Built-in Network Detection Tool Use Guide](https://www.taosdata.com/blog/2020/09/08/1816.html). + + + +## 6.What to do if I encounter an error "Unexpected generic error in RPC" or "TDengine error: Unable to resolve FQDN"? + +This error occurs because the client or data node cannot parse the FQDN (Fully Qualified Domain Name). For TAOS shell or client applications, check the following: + +1. Please verify whether the FQDN of the connected server is correct. FQDN configuration reference: "[All about FQDN of TDengine](https://www.taosdata.com/blog/2020/09/11/1824.html)". +2. If the network is configured with a DNS server, check that it is working properly. +3. If the network does not have a DNS server configured, check the hosts file of the machine where the client is located to see if the FQDN is configured and has the correct IP address. +4. If the network configuration is OK, from the machine where the client is located, you need to be able to ping the connected FQDN, otherwise the client cannot connect to the server + +## 7.Although the syntax is corrected, why do I still get the “Invalid SQL" error? + +If you confirm that the syntax is correct, for versions older than 2.0, please check whether the SQL statement length exceeds 64K. If it does, this error will also be returned. + +## 8. Are “validation queries” supported? + +The TDengine does not yet have a dedicated set of validation queries. However, it is recommended to use the database "log" monitored by the system. + +## 9. Can I delete or update a record? + +TDengine does not support the deletion function at present, and may support it in the future according to user requirements. + +Starting from 2.0. 8.0, TDengine supports the function of updating written data. Using the update function requires using UPDATE 1 parameter when creating the database, and then you can use INSERT INTO command to update the same timestamp data that has been written. UPDATE parameter does not support ALTER DATABASE command modification. Without a database created using UPDATE 1 parameter, writing data with the same timestamp will not modify the previous data with no error reported. + +It should also be noted that when UPDATE is set to 0, the data with the same timestamp sent later will be discarded directly, but no error will be reported, and will still be included in affected rows (so the return information of INSERT instruction cannot be used for timestamp duplicate checking). The main reason for this design is that TDengine regards the written data as a stream. Regardless of whether the timestamp conflicts or not, TDengine believes that the original device that generates the data actually generates such data. The UPDATE parameter only controls how such stream data should be processed when persistence-when UPDATE is 0, it means that the data written first overwrites the data written later; When UPDATE is 1, it means that the data written later overwrites the data written first. How to choose this coverage relationship depends on whether the data generated first or later is expected in the subsequent use and statistics compile. + +## 10. How to create a table with more than 1024 columns? + +Using version 2.0 and above, 1024 columns are supported by default; for older versions, TDengine allowed the creation of a table with a maximum of 250 columns. However, if the limit is exceeded, it is recommended to logically split this wide table into several small ones according to the data characteristics. + +## 11. What is the most effective way to write data? + +Insert in batches. Each write statement can insert multiple records into one or multiple tables at the same time. + +## 12. What is the most effective way to write data? How to solve the problem that Chinese characters in nchar inserted under Windows systems are parsed into messy code? + +If there are Chinese characters in nchar data under Windows, please first confirm that the region of the system is set to China (which can be set in the Control Panel), then the taos client in cmd should already support it normally; If you are developing Java applications in an IDE, such as Eclipse and Intellij, please confirm that the file code in the IDE is GBK (this is the default coding type of Java), and then initialize the configuration of the client when generating the Connection. The specific statement is as follows: + +```JAVA +Class.forName("com.taosdata.jdbc.TSDBDriver"); +Properties properties = new Properties(); +properties.setProperty(TSDBDriver.LOCALE_KEY, "UTF-8"); +Connection = DriverManager.getConnection(url, properties); +``` + +## 13. JDBC error: the excluded SQL is not a DML or a DDL? + +Please update to the latest JDBC driver. + +```xml + + com.taosdata.jdbc + taos-jdbcdriver + 2.0.27 + +``` + +## 14. taos connect failed, reason: invalid timestamp. + +The common reason is that the server time and client time are not calibrated, which can be calibrated by synchronizing with the time server (use ntpdate command under Linux, and select automatic synchronization in the Windows time setting). + +## 15. Incomplete display of table name + +Due to the limited display width of taos shell in the terminal, it is possible that a relatively long table name is not displayed completely. If relevant operations are carried out according to the displayed incomplete table name, a Table does not exist error will occur. The workaround can be by modifying the setting option maxBinaryDisplayWidth in the taos.cfg file, or directly entering the command `set max_binary_display_width 100`. Or, use the \\G parameter at the end of the command to adjust how the results are displayed. + +## 16. How to migrate data? + +TDengine uniquely identifies a machine according to hostname. When moving data files from machine A to machine B, pay attention to the following three points: + +- For versions 2.0. 0.0 to 2.0. 6. x, reconfigure machine B's hostname to machine A's. +- For 2.0. 7.0 and later versions, go to/var/lib/taos/dnode, repair the FQDN corresponding to dnodeId of dnodeEps.json, and restart. Make sure this file is identical for all machines. +- The storage structures of versions 1. x and 2. x are incompatible, and it is necessary to use migration tools or your own application to export and import data. + +## 17. How to temporarily adjust the log level in command line program taos? + +For the convenience of debugging, since version 2.0. 16, command line program taos gets two new instructions related to logging: + +```mysql +ALTER LOCAL flag_name flag_value; +``` + +This means that under the current command line program, modify the loglevel of a specific module (only valid for the current command line program, if taos is restarted, it needs to be reset): + +- The values of flag_name can be: debugFlag, cDebugFlag, tmrDebugFlag, uDebugFlag, rpcDebugFlag +- Flag_value values can be: 131 (output error and alarm logs), 135 (output error, alarm, and debug logs), 143 (output error, alarm, debug, and trace logs) + +```mysql +ALTER LOCAL RESETLOG; +``` + +This means wiping up all client-generated log files on the machine. + diff --git a/documentation20/en/images/architecture/dnode.png b/documentation20/en/images/architecture/dnode.png new file mode 100644 index 0000000000000000000000000000000000000000..cea87dcccba5d2761996e5dde998022d86487eb9 Binary files /dev/null and b/documentation20/en/images/architecture/dnode.png differ diff --git a/documentation20/webdocs/assets/Picture2.png b/documentation20/en/images/architecture/message.png similarity index 100% rename from documentation20/webdocs/assets/Picture2.png rename to documentation20/en/images/architecture/message.png diff --git a/documentation20/en/images/architecture/modules.png b/documentation20/en/images/architecture/modules.png new file mode 100644 index 0000000000000000000000000000000000000000..10ae4703a6cbbf66afea325ce4c0f919f7769a07 Binary files /dev/null and b/documentation20/en/images/architecture/modules.png differ diff --git a/documentation20/webdocs/assets/stable.png b/documentation20/en/images/architecture/multi_tables.png similarity index 100% rename from documentation20/webdocs/assets/stable.png rename to documentation20/en/images/architecture/multi_tables.png diff --git a/documentation20/en/images/architecture/replica-forward.png b/documentation20/en/images/architecture/replica-forward.png new file mode 100644 index 0000000000000000000000000000000000000000..bf616e030b130603eceb5dccfd30b4a1dfa68ea5 Binary files /dev/null and b/documentation20/en/images/architecture/replica-forward.png differ diff --git a/documentation20/en/images/architecture/replica-master.png b/documentation20/en/images/architecture/replica-master.png new file mode 100644 index 0000000000000000000000000000000000000000..cb33f1ce98661563693215d8fc73b003235c7668 Binary files /dev/null and b/documentation20/en/images/architecture/replica-master.png differ diff --git a/documentation20/en/images/architecture/replica-restore.png b/documentation20/en/images/architecture/replica-restore.png new file mode 100644 index 0000000000000000000000000000000000000000..1558e5ed0108d23efdc6b5d9ea0e44a1dff45d28 Binary files /dev/null and b/documentation20/en/images/architecture/replica-restore.png differ diff --git a/documentation20/webdocs/assets/structure.png b/documentation20/en/images/architecture/structure.png similarity index 100% rename from documentation20/webdocs/assets/structure.png rename to documentation20/en/images/architecture/structure.png diff --git a/documentation20/en/images/architecture/vnode.png b/documentation20/en/images/architecture/vnode.png new file mode 100644 index 0000000000000000000000000000000000000000..e6148d4907cf9a18bc52251f712d5c685651b7f5 Binary files /dev/null and b/documentation20/en/images/architecture/vnode.png differ diff --git a/documentation20/en/images/architecture/write_master.png b/documentation20/en/images/architecture/write_master.png new file mode 100644 index 0000000000000000000000000000000000000000..ff2dfc20bfc2ecf956a2aab1a8965a7bbcae4387 Binary files /dev/null and b/documentation20/en/images/architecture/write_master.png differ diff --git a/documentation20/en/images/architecture/write_slave.png b/documentation20/en/images/architecture/write_slave.png new file mode 100644 index 0000000000000000000000000000000000000000..cacb2cb6bcc4f4d934e979862387e1345bbac078 Binary files /dev/null and b/documentation20/en/images/architecture/write_slave.png differ diff --git a/documentation20/en/images/connections/add_datasource1.jpg b/documentation20/en/images/connections/add_datasource1.jpg new file mode 100644 index 0000000000000000000000000000000000000000..1f0f5110f312c57f3ec1788bbc02f04fac6ac142 Binary files /dev/null and b/documentation20/en/images/connections/add_datasource1.jpg differ diff --git a/documentation20/en/images/connections/add_datasource2.jpg b/documentation20/en/images/connections/add_datasource2.jpg new file mode 100644 index 0000000000000000000000000000000000000000..fa7a83e00e96fae649910dff4edf5f5bdadd7850 Binary files /dev/null and b/documentation20/en/images/connections/add_datasource2.jpg differ diff --git a/documentation20/en/images/connections/add_datasource3.jpg b/documentation20/en/images/connections/add_datasource3.jpg new file mode 100644 index 0000000000000000000000000000000000000000..fc850ad08ff1174de972906842e0d5ee64e6e5cb Binary files /dev/null and b/documentation20/en/images/connections/add_datasource3.jpg differ diff --git a/documentation20/en/images/connections/add_datasource4.jpg b/documentation20/en/images/connections/add_datasource4.jpg new file mode 100644 index 0000000000000000000000000000000000000000..3ba73e50d455111f8621f4165746078554c2d790 Binary files /dev/null and b/documentation20/en/images/connections/add_datasource4.jpg differ diff --git a/documentation20/en/images/connections/create_dashboard1.jpg b/documentation20/en/images/connections/create_dashboard1.jpg new file mode 100644 index 0000000000000000000000000000000000000000..3b83c3a1714e9e7540e0b06239ef7c1c4f63fe2c Binary files /dev/null and b/documentation20/en/images/connections/create_dashboard1.jpg differ diff --git a/documentation20/en/images/connections/create_dashboard2.jpg b/documentation20/en/images/connections/create_dashboard2.jpg new file mode 100644 index 0000000000000000000000000000000000000000..fe5d768ac55254251e0290bf257178f5ff28f5a5 Binary files /dev/null and b/documentation20/en/images/connections/create_dashboard2.jpg differ diff --git a/documentation20/en/images/connections/import_dashboard1.jpg b/documentation20/en/images/connections/import_dashboard1.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9d2ce7ed65eb0c2c729de50283b30491793493dc Binary files /dev/null and b/documentation20/en/images/connections/import_dashboard1.jpg differ diff --git a/documentation20/en/images/connections/import_dashboard2.jpg b/documentation20/en/images/connections/import_dashboard2.jpg new file mode 100644 index 0000000000000000000000000000000000000000..94b09f0ee39552bb84f7ba1f65815ce2c9548b2d Binary files /dev/null and b/documentation20/en/images/connections/import_dashboard2.jpg differ diff --git a/documentation20/en/images/connector.png b/documentation20/en/images/connector.png new file mode 100644 index 0000000000000000000000000000000000000000..6030bd73f51123615eabacfa5d734918559ce3d9 Binary files /dev/null and b/documentation20/en/images/connector.png differ diff --git a/documentation20/en/images/eco_system.png b/documentation20/en/images/eco_system.png new file mode 100644 index 0000000000000000000000000000000000000000..bf8bf8f1e0a2311fc12202d712a8a2f9b8ce419b Binary files /dev/null and b/documentation20/en/images/eco_system.png differ diff --git a/documentation20/en/images/tdengine-jdbc-connector.png b/documentation20/en/images/tdengine-jdbc-connector.png new file mode 100644 index 0000000000000000000000000000000000000000..fdf1dd3fcc5ee222c4a8753efa2c95c5257314bf Binary files /dev/null and b/documentation20/en/images/tdengine-jdbc-connector.png differ diff --git a/documentation20/webdocs/assets/clip_image001-2474914.png b/documentation20/webdocs/assets/clip_image001-2474914.png deleted file mode 100644 index eb369b1567c860b772e1bfdad64ff17aaac2534d..0000000000000000000000000000000000000000 Binary files a/documentation20/webdocs/assets/clip_image001-2474914.png and /dev/null differ diff --git a/documentation20/webdocs/assets/clip_image001-2474939.png b/documentation20/webdocs/assets/clip_image001-2474939.png deleted file mode 100644 index 53f00deea3a484986a5681ec9d00d8ae02e88fec..0000000000000000000000000000000000000000 Binary files a/documentation20/webdocs/assets/clip_image001-2474939.png and /dev/null differ diff --git a/documentation20/webdocs/assets/clip_image001-2474961.png b/documentation20/webdocs/assets/clip_image001-2474961.png deleted file mode 100644 index 20ae8d6f7724a4bddcf8c7eb3809d468aa4223ed..0000000000000000000000000000000000000000 Binary files a/documentation20/webdocs/assets/clip_image001-2474961.png and /dev/null differ diff --git a/documentation20/webdocs/assets/clip_image001-2474987.png b/documentation20/webdocs/assets/clip_image001-2474987.png deleted file mode 100644 index 3d09f7fc28e7a1fb7e3bb2b9b2bc7c20895e8bb4..0000000000000000000000000000000000000000 Binary files a/documentation20/webdocs/assets/clip_image001-2474987.png and /dev/null differ diff --git a/documentation20/webdocs/assets/clip_image001.png b/documentation20/webdocs/assets/clip_image001.png deleted file mode 100644 index 78b6d06a9562b802e80f0ed5fdb8963b5e525589..0000000000000000000000000000000000000000 Binary files a/documentation20/webdocs/assets/clip_image001.png and /dev/null differ diff --git a/documentation20/webdocs/assets/fig1.png b/documentation20/webdocs/assets/fig1.png deleted file mode 100644 index af9b74e0d1a872b8d93f71842dc0063bc8a86092..0000000000000000000000000000000000000000 Binary files a/documentation20/webdocs/assets/fig1.png and /dev/null differ diff --git a/documentation20/webdocs/assets/fig2.png b/documentation20/webdocs/assets/fig2.png deleted file mode 100644 index 3bae70ba86964c3c341b72ea1d3af04201f7c6c1..0000000000000000000000000000000000000000 Binary files a/documentation20/webdocs/assets/fig2.png and /dev/null differ diff --git a/documentation20/webdocs/assets/image-20190707124650780.png b/documentation20/webdocs/assets/image-20190707124650780.png deleted file mode 100644 index 9ebcac863e862d8b240c86dec29be1ebe7aa50f0..0000000000000000000000000000000000000000 Binary files a/documentation20/webdocs/assets/image-20190707124650780.png and /dev/null differ diff --git a/documentation20/webdocs/assets/image-20190707124818590.png b/documentation20/webdocs/assets/image-20190707124818590.png deleted file mode 100644 index dc1cb6325b2d4cd6f05c88b75b4d17ef85caa67f..0000000000000000000000000000000000000000 Binary files a/documentation20/webdocs/assets/image-20190707124818590.png and /dev/null differ diff --git a/documentation20/webdocs/markdowndocs/Connections with other Tools.md b/documentation20/webdocs/markdowndocs/Connections with other Tools.md deleted file mode 100644 index 8be05698497184aee2c41a60e32f39b636e2070e..0000000000000000000000000000000000000000 --- a/documentation20/webdocs/markdowndocs/Connections with other Tools.md +++ /dev/null @@ -1,167 +0,0 @@ -# Connect with other tools - -## Telegraf - -TDengine is easy to integrate with [Telegraf](https://www.influxdata.com/time-series-platform/telegraf/), an open-source server agent for collecting and sending metrics and events, without more development. - -### Install Telegraf - -At present, TDengine supports Telegraf newer than version 1.7.4. Users can go to the [download link] and choose the proper package to install on your system. - -### Configure Telegraf - -Telegraf is configured by changing items in the configuration file */etc/telegraf/telegraf.conf*. - - -In **output plugins** section,add _[[outputs.http]]_ iterm: - -- _url_: http://ip:6020/telegraf/udb, in which _ip_ is the IP address of any node in TDengine cluster. Port 6020 is the RESTful APT port used by TDengine. _udb_ is the name of the database to save data, which needs to create beforehand. -- _method_: "POST" -- _username_: username to login TDengine -- _password_: password to login TDengine -- _data_format_: "json" -- _json_timestamp_units_: "1ms" - -In **agent** part: - -- hostname: used to distinguish different machines. Need to be unique. -- metric_batch_size: 30,the maximum number of records allowed to write in Telegraf. The larger the value is, the less frequent requests are sent. For TDengine, the value should be less than 50. - -Please refer to the [Telegraf docs](https://docs.influxdata.com/telegraf/v1.11/) for more information. - -## Grafana - -[Grafana] is an open-source system for time-series data display. It is easy to integrate TDengine and Grafana to build a monitor system. Data saved in TDengine can be fetched and shown on the Grafana dashboard. - -### Install Grafana - -For now, TDengine only supports Grafana newer than version 5.2.4. Users can go to the [Grafana download page] for the proper package to download. - -### Configure Grafana - -TDengine Grafana plugin is in the _/usr/local/taos/connector/grafana_ directory. -Taking Centos 7.2 as an example, just copy TDengine directory to _/var/lib/grafana/plugins_ directory and restart Grafana. - -### Use Grafana - -Users can log in the Grafana server (username/password:admin/admin) through localhost:3000 to configure TDengine as the data source. As is shown in the picture below, TDengine as a data source option is shown in the box: - - -![img](../assets/clip_image001.png) - -When choosing TDengine as the data source, the Host in HTTP configuration should be configured as the IP address of any node of a TDengine cluster. The port should be set as 6020. For example, when TDengine and Grafana are on the same machine, it should be configured as _http://localhost:6020. - - -Besides, users also should set the username and password used to log into TDengine. Then click _Save&Test_ button to save. - -![img](../assets/clip_image001-2474914.png) - -Then, TDengine as a data source should show in the Grafana data source list. - -![img](../assets/clip_image001-2474939.png) - - -Then, users can create Dashboards in Grafana using TDengine as the data source: - - -![img](../assets/clip_image001-2474961.png) - - - -Click _Add Query_ button to add a query and input the SQL command you want to run in the _INPUT SQL_ text box. The SQL command should expect a two-row, multi-column result, such as _SELECT count(*) FROM sys.cpu WHERE ts>=from and ts<​to interval(interval)_, in which, _from_, _to_ and _inteval_ are TDengine inner variables representing query time range and time interval. - - -_ALIAS BY_ field is to set the query alias. Click _GENERATE SQL_ to send the command to TDengine: - -![img](../assets/clip_image001-2474987.png) - -Please refer to the [Grafana official document] for more information about Grafana. - - -## Matlab - -Matlab can connect to and retrieve data from TDengine by TDengine JDBC Driver. - -### MatLab and TDengine JDBC adaptation - -Several steps are required to adapt Matlab to TDengine. Taking adapting Matlab2017a on Windows10 as an example: - -1. Copy the file _JDBCDriver-1.0.0-dist.jar_ in TDengine package to the directory _${matlab_root}\MATLAB\R2017a\java\jar\toolbox_ -2. Copy the file _taos.lib_ in TDengine package to _${matlab_ root _dir}\MATLAB\R2017a\lib\win64_ -3. Add the .jar package just copied to the Matlab classpath. Append the line below as the end of the file of _${matlab_ root _dir}\MATLAB\R2017a\toolbox\local\classpath.txt_ - -​ `$matlabroot/java/jar/toolbox/JDBCDriver-1.0.0-dist.jar` - -4. Create a file called _javalibrarypath.txt_ in directory _${user_home}\AppData\Roaming\MathWorks\MATLAB\R2017a\_, and add the _taos.dll_ path in the file. For example, if the file _taos.dll_ is in the directory of _C:\Windows\System32_,then add the following line in file *javalibrarypath.txt*: - -​ `C:\Windows\System32` - -### TDengine operations in Matlab - -After correct configuration, open Matlab: - -- build a connection: - - `conn = database(‘db’, ‘root’, ‘taosdata’, ‘com.taosdata.jdbc.TSDBDriver’, ‘jdbc:TSDB://127.0.0.1:0/’)` - -- Query: - - `sql0 = [‘select * from tb’]` - - `data = select(conn, sql0);` - -- Insert a record: - - `sql1 = [‘insert into tb values (now, 1)’]` - - `exec(conn, sql1)` - -Please refer to the file _examples\Matlab\TDengineDemo.m_ for more information. - -## R - -Users can use R language to access the TDengine server with the JDBC interface. At first, install JDBC package in R: - -```R -install.packages('rJDBC', repos='http://cran.us.r-project.org') -``` - -Then use _library_ function to load the package: - -```R -library('RJDBC') -``` - -Then load the TDengine JDBC driver: - -```R -drv<-JDBC("com.taosdata.jdbc.TSDBDriver","JDBCDriver-1.0.0-dist.jar", identifier.quote="\"") -``` -If succeed, no error message will display. Then use the following command to try a database connection: - -```R -conn<-dbConnect(drv,"jdbc:TSDB://192.168.0.1:0/?user=root&password=taosdata","root","taosdata") -``` - -Please replace the IP address in the command above to the correct one. If no error message is shown, then the connection is established successfully. TDengine supports below functions in _RJDBC_ package: - - -- _dbWriteTable(conn, "test", iris, overwrite=FALSE, append=TRUE)_: write the data in a data frame _iris_ to the table _test_ in the TDengine server. Parameter _overwrite_ must be _false_. _append_ must be _TRUE_ and the schema of the data frame _iris_ should be the same as the table _test_. -- _dbGetQuery(conn, "select count(*) from test")_: run a query command -- _dbSendUpdate(conn, "use db")_: run any non-query command. -- _dbReadTable(conn, "test"_): read all the data in table _test_ -- _dbDisconnect(conn)_: close a connection -- _dbRemoveTable(conn, "test")_: remove table _test_ - -Below functions are **not supported** currently: -- _dbExistsTable(conn, "test")_: if talbe _test_ exists -- _dbListTables(conn)_: list all tables in the connection - - -[Telegraf]: www.taosdata.com -[download link]: https://portal.influxdata.com/downloads -[Telegraf document]: www.taosdata.com -[Grafana]: https://grafana.com -[Grafana download page]: https://grafana.com/grafana/download -[Grafana official document]: https://grafana.com/docs/ - diff --git a/documentation20/webdocs/markdowndocs/Connector.md b/documentation20/webdocs/markdowndocs/Connector.md deleted file mode 100644 index e5ba6d518542fa60f71708482a9e9b65c12d09ad..0000000000000000000000000000000000000000 --- a/documentation20/webdocs/markdowndocs/Connector.md +++ /dev/null @@ -1,896 +0,0 @@ -# TDengine connectors - -TDengine provides many connectors for development, including C/C++, JAVA, Python, RESTful, Go, Node.JS, etc. - -NOTE: All APIs which require a SQL string as parameter, including but not limit to `taos_query`, `taos_query_a`, `taos_subscribe` in the C/C++ Connector and their counterparts in other connectors, can ONLY process one SQL statement at a time. If more than one SQL statements are provided, their behaviors are undefined. - -## C/C++ API - -C/C++ APIs are similar to the MySQL APIs. Applications should include TDengine head file _taos.h_ to use C/C++ APIs by adding the following line in code: -```C -#include -``` -Make sure TDengine library _libtaos.so_ is installed and use _-ltaos_ option to link the library when compiling. In most cases, if the return value of an API is integer, it return _0_ for success and other values as an error code for failure; if the return value is pointer, then _NULL_ is used for failure. - - -### Fundamental API - -Fundamentatal APIs prepare runtime environment for other APIs, for example, create a database connection. - -- `void taos_init()` - - Initialize the runtime environment for TDengine client. The API is not necessary since it is called int _taos_connect_ by default. - - -- `void taos_cleanup()` - - Cleanup runtime environment, client should call this API before exit. - - -- `int taos_options(TSDB_OPTION option, const void * arg, ...)` - - Set client options. The parameter _option_ supports values of _TSDB_OPTION_CONFIGDIR_ (configuration directory), _TSDB_OPTION_SHELL_ACTIVITY_TIMER_, _TSDB_OPTION_LOCALE_ (client locale) and _TSDB_OPTION_TIMEZONE_ (client timezone). - - -- `char* taos_get_client_info()` - - Retrieve version information of client. - - -- `TAOS *taos_connect(const char *ip, const char *user, const char *pass, const char *db, int port)` - - Open a connection to a TDengine server. The parameters are: - - * ip: IP address of the server - * user: username - * pass: password - * db: database to use, **NULL** for no database to use after connection. Otherwise, the database should exist before connection or a connection error is reported. - * port: port number to connect - - The handle returned by this API should be kept for future use. - - -- `char *taos_get_server_info(TAOS *taos)` - - Retrieve version information of server. - - -- `int taos_select_db(TAOS *taos, const char *db)` - - Set default database to `db`. - - -- `void taos_close(TAOS *taos)` - - Close a connection to a TDengine server by the handle returned by _taos_connect_` - - -### C/C++ sync API - -Sync APIs are those APIs waiting for responses from the server after sending a request. TDengine has the following sync APIs: - -- `TAOS_RES* taos_query(TAOS *taos, const char *sql)` - - The API used to run a SQL command. The command can be DQL, DML or DDL. The parameter _taos_ is the handle returned by _taos_connect_. Return value _NULL_ means failure. - - -- `int taos_result_precision(TAOS_RES *res)` - - Get the timestamp precision of the result set, return value _0_ means milli-second, _1_ mean micro-second and _2_ means nano-second. - - -- `TAOS_ROW taos_fetch_row(TAOS_RES *res)` - - Fetch a row of return results through _res_. - - -- `int taos_fetch_block(TAOS_RES *res, TAOS_ROW *rows)` - - Fetch multiple rows from the result set, return value is row count. - - -- `int taos_num_fields(TAOS_RES *res)` and `int taos_field_count(TAOS_RES* res)` - - These two APIs are identical, both return the number of fields in the return result. - - -- `int* taos_fetch_lengths(TAOS_RES *res)` - - Get the field lengths of the result set, return value is an array whose length is the field count. - - -- `int taos_affected_rows(TAOS_RES *res)` - - Get affected row count of the executed statement. - - -- `TAOS_FIELD *taos_fetch_fields(TAOS_RES *res)` - - Fetch the description of each field. The description includes the property of data type, field name, and bytes. The API should be used with _taos_num_fields_ to fetch a row of data. The structure of `TAOS_FIELD` is: - - ```c - typedef struct taosField { - char name[65]; // field name - uint8_t type; // data type - int16_t bytes; // length of the field in bytes - } TAOS_FIELD; - ``` - - -- `void taos_stop_query(TAOS_RES *res)` - - Stop the execution of a query. - - -- `void taos_free_result(TAOS_RES *res)` - - Free the resources used by a result set. Make sure to call this API after fetching results or memory leak would happen. - - -- `char *taos_errstr(TAOS_RES *res)` - - Return the reason of the last API call failure. The return value is a string. - - -- `int *taos_errno(TAOS_RES *res)` - - Return the error code of the last API call failure. The return value is an integer. - - -**Note**: The connection to a TDengine server is not multi-thread safe. So a connection can only be used by one thread. - - -### C/C++ async API - -In addition to sync APIs, TDengine also provides async APIs, which are more efficient. Async APIs are returned right away without waiting for a response from the server, allowing the application to continute with other tasks without blocking. So async APIs are more efficient, especially useful when in a poor network. - -All async APIs require callback functions. The callback functions have the format: -```C -void fp(void *param, TAOS_RES * res, TYPE param3) -``` -The first two parameters of the callback function are the same for all async APIs. The third parameter is different for different APIs. Generally, the first parameter is the handle provided to the API for action. The second parameter is a result handle. - -- `void taos_query_a(TAOS *taos, const char *sql, void (*fp)(void *param, TAOS_RES *, int code), void *param);` - - The async version of _taos_query_. - - * taos: the handle returned by _taos_connect_. - * sql: the SQL command to run. - * fp: user defined callback function. The third parameter of the callback function _code_ is _0_ (for success) or a negative number (for failure, call taos_errstr to get the error as a string). Applications mainly handle the second parameter, the returned result set. - * param: user provided parameter which is required by the callback function. - - -- `void taos_fetch_rows_a(TAOS_RES *res, void (*fp)(void *param, TAOS_RES *, int numOfRows), void *param);` - - The async API to fetch a batch of rows, which should only be used with a _taos_query_a_ call. - - * res: result handle returned by _taos_query_a_. - * fp: the callback function. _param_ is a user-defined structure to pass to _fp_. The parameter _numOfRows_ is the number of result rows in the current fetch cycle. In the callback function, applications should call _taos_fetch_row_ to get records from the result handle. After getting a batch of results, applications should continue to call _taos_fetch_rows_a_ API to handle the next batch, until the _numOfRows_ is _0_ (for no more data to fetch) or _-1_ (for failure). - - -- `void taos_fetch_row_a(TAOS_RES *res, void (*fp)(void *param, TAOS_RES *, TAOS_ROW row), void *param);` - - The async API to fetch a result row. - - * res: result handle. - * fp: the callback function. _param_ is a user-defined structure to pass to _fp_. The third parameter of the callback function is a single result row, which is different from that of _taos_fetch_rows_a_ API. With this API, it is not necessary to call _taos_fetch_row_ to retrieve each result row, which is handier than _taos_fetch_rows_a_ but less efficient. - - -Applications may apply operations on multiple tables. However, **it is important to make sure the operations on the same table are serialized**. That means after sending an insert request in a table to the server, no operations on the table are allowed before a response is received. - - -### C/C++ parameter binding API - -TDengine also provides parameter binding APIs, like MySQL, only question mark `?` can be used to represent a parameter in these APIs. - -- `TAOS_STMT* taos_stmt_init(TAOS *taos)` - - Create a TAOS_STMT to represent the prepared statement for other APIs. - -- `int taos_stmt_prepare(TAOS_STMT *stmt, const char *sql, unsigned long length)` - - Parse SQL statement _sql_ and bind result to _stmt_ , if _length_ larger than 0, its value is used to determine the length of _sql_, the API auto detects the actual length of _sql_ otherwise. - -- `int taos_stmt_bind_param(TAOS_STMT *stmt, TAOS_BIND *bind)` - - Bind values to parameters. _bind_ points to an array, the element count and sequence of the array must be identical as the parameters of the SQL statement. The usage of _TAOS_BIND_ is same as _MYSQL_BIND_ in MySQL, its definition is as below: - - ```c - typedef struct TAOS_BIND { - int buffer_type; - void * buffer; - unsigned long buffer_length; // not used in TDengine - unsigned long *length; - int * is_null; - int is_unsigned; // not used in TDengine - int * error; // not used in TDengine - } TAOS_BIND; - ``` - -- `int taos_stmt_add_batch(TAOS_STMT *stmt)` - - Add bound parameters to batch, client can call `taos_stmt_bind_param` again after calling this API. Note this API only support _insert_ / _import_ statements, it returns an error in other cases. - -- `int taos_stmt_execute(TAOS_STMT *stmt)` - - Execute the prepared statement. This API can only be called once for a statement at present. - -- `TAOS_RES* taos_stmt_use_result(TAOS_STMT *stmt)` - - Acquire the result set of an executed statement. The usage of the result is same as `taos_use_result`, `taos_free_result` must be called after one you are done with the result set to release resources. - -- `int taos_stmt_close(TAOS_STMT *stmt)` - - Close the statement, release all resources. - - -### C/C++ continuous query interface - -TDengine provides APIs for continuous query driven by time, which run queries periodically in the background. There are only two APIs: - - -- `TAOS_STREAM *taos_open_stream(TAOS *taos, const char *sqlstr, void (*fp)(void *param, TAOS_RES * res, TAOS_ROW row), int64_t stime, void *param, void (*callback)(void *));` - - The API is used to create a continuous query. - * _taos_: the connection handle returned by _taos_connect_. - * _sqlstr_: the SQL string to run. Only query commands are allowed. - * _fp_: the callback function to run after a query. TDengine passes query result `row`, query state `res` and user provided parameter `param` to this function. In this callback, `taos_num_fields` and `taos_fetch_fields` could be used to fetch field information. - * _param_: a parameter passed to _fp_ - * _stime_: the time of the stream starts in the form of epoch milliseconds. If _0_ is given, the start time is set as the current time. - * _callback_: a callback function to run when the continuous query stops automatically. - - The API is expected to return a handle for success. Otherwise, a NULL pointer is returned. - - -- `void taos_close_stream (TAOS_STREAM *tstr)` - - Close the continuous query by the handle returned by _taos_open_stream_. Make sure to call this API when the continuous query is not needed anymore. - - -### C/C++ subscription API - -For the time being, TDengine supports subscription on one or multiple tables. It is implemented through periodic pulling from a TDengine server. - -* `TAOS_SUB *taos_subscribe(TAOS* taos, int restart, const char* topic, const char *sql, TAOS_SUBSCRIBE_CALLBACK fp, void *param, int interval)` - - The API is used to start a subscription session, it returns the subscription object on success and `NULL` in case of failure, the parameters are: - * **taos**: The database connnection, which must be established already. - * **restart**: `Zero` to continue a subscription if it already exits, other value to start from the beginning. - * **topic**: The unique identifier of a subscription. - * **sql**: A sql statement for data query, it can only be a `select` statement, can only query for raw data, and can only query data in ascending order of the timestamp field. - * **fp**: A callback function to receive query result, only used in asynchronization mode and should be `NULL` in synchronization mode, please refer below for its prototype. - * **param**: User provided additional parameter for the callback function. - * **interval**: Pulling interval in millisecond. Under asynchronization mode, API will call the callback function `fp` in this interval, system performance will be impacted if this interval is too short. Under synchronization mode, if the duration between two call to `taos_consume` is less than this interval, the second call blocks until the duration exceed this interval. - -* `typedef void (*TAOS_SUBSCRIBE_CALLBACK)(TAOS_SUB* tsub, TAOS_RES *res, void* param, int code)` - - Prototype of the callback function, the parameters are: - * tsub: The subscription object. - * res: The query result. - * param: User provided additional parameter when calling `taos_subscribe`. - * code: Error code in case of failures. - -* `TAOS_RES *taos_consume(TAOS_SUB *tsub)` - - The API used to get the new data from a TDengine server. It should be put in an loop. The parameter `tsub` is the handle returned by `taos_subscribe`. This API should only be called in synchronization mode. If the duration between two call to `taos_consume` is less than pulling interval, the second call blocks until the duration exceed the interval. The API returns the new rows if new data arrives, or empty rowset otherwise, and if there's an error, it returns `NULL`. - -* `void taos_unsubscribe(TAOS_SUB *tsub, int keepProgress)` - - Stop a subscription session by the handle returned by `taos_subscribe`. If `keepProgress` is **not** zero, the subscription progress information is kept and can be reused in later call to `taos_subscribe`, the information is removed otherwise. - - -## Java Connector - -To Java delevopers, TDengine provides `taos-jdbcdriver` according to the JDBC(3.0) API. Users can find and download it through [Sonatype Repository][1]. - -Since the native language of TDengine is C, the necessary TDengine library should be checked before using the taos-jdbcdriver: - -* libtaos.so (Linux) - After TDengine is installed successfully, the library `libtaos.so` will be automatically copied to the `/usr/lib/`, which is the system's default search path. - -* taos.dll (Windows) - After TDengine client is installed, the library `taos.dll` will be automatically copied to the `C:/Windows/System32`, which is the system's default search path. - -> Note: Please make sure that [TDengine Windows client][14] has been installed if developing on Windows. Now although TDengine client would be defaultly installed together with TDengine server, it can also be installed [alone][15]. - -Since TDengine is time-series database, there are still some differences compared with traditional databases in using TDengine JDBC driver: -* TDengine doesn't allow to delete/modify a single record, and thus JDBC driver also has no such method. -* No support for transaction -* No support for union between tables -* No support for nested query,`There is at most one open ResultSet for each Connection. Thus, TSDB JDBC Driver will close current ResultSet if it is not closed and a new query begins`. - -## Version list of TAOS-JDBCDriver and required TDengine and JDK - -| taos-jdbcdriver | TDengine | JDK | -| --- | --- | --- | -| 2.0.2 | 2.0.0.x or higher | 1.8.x | -| 1.0.3 | 1.6.1.x or higher | 1.8.x | -| 1.0.2 | 1.6.1.x or higher | 1.8.x | -| 1.0.1 | 1.6.1.x or higher | 1.8.x | - -## DataType in TDengine and Java - -The datatypes in TDengine include timestamp, number, string and boolean, which are converted as follows in Java: - -| TDengine | Java | -| --- | --- | -| TIMESTAMP | java.sql.Timestamp | -| INT | java.lang.Integer | -| BIGINT | java.lang.Long | -| FLOAT | java.lang.Float | -| DOUBLE | java.lang.Double | -| SMALLINT, TINYINT |java.lang.Short | -| BOOL | java.lang.Boolean | -| BINARY, NCHAR | java.lang.String | - -## How to get TAOS-JDBC Driver - -### maven repository - -taos-jdbcdriver has been published to [Sonatype Repository][1]: -* [sonatype][8] -* [mvnrepository][9] -* [maven.aliyun][10] - -Using the following pom.xml for maven projects - -```xml - - - com.taosdata.jdbc - taos-jdbcdriver - 2.0.2 - - -``` - -### JAR file from the source code - -After downloading the [TDengine][3] source code, execute `mvn clean package` in the directory `src/connector/jdbc` and then the corresponding jar file is generated. - -## Usage - -### get the connection - -```java -Class.forName("com.taosdata.jdbc.TSDBDriver"); -String jdbcUrl = "jdbc:TAOS://127.0.0.1:6030/log?user=root&password=taosdata"; -Connection conn = DriverManager.getConnection(jdbcUrl); -``` -> `6030` is the default port and `log` is the default database for system monitor. - -A normal JDBC URL looks as follows: -`jdbc:TAOS://{host_ip}:{port}/[database_name]?[user={user}|&password={password}|&charset={charset}|&cfgdir={config_dir}|&locale={locale}|&timezone={timezone}]` - -values in `{}` are necessary while values in `[]` are optional。Each option in the above URL denotes: - -* user:user name for login, defaultly root。 -* password:password for login,defaultly taosdata。 -* charset:charset for client,defaultly system charset -* cfgdir:log directory for client, defaultly _/etc/taos/_ on Linux and _C:/TDengine/cfg_ on Windows。 -* locale:language for client,defaultly system locale。 -* timezone:timezone for client,defaultly system timezone。 - -The options above can be configures (`ordered by priority`): -1. JDBC URL - - As explained above. -2. java.sql.DriverManager.getConnection(String jdbcUrl, Properties connProps) -```java -public Connection getConn() throws Exception{ - Class.forName("com.taosdata.jdbc.TSDBDriver"); - String jdbcUrl = "jdbc:TAOS://127.0.0.1:0/log?user=root&password=taosdata"; - Properties connProps = new Properties(); - connProps.setProperty(TSDBDriver.PROPERTY_KEY_USER, "root"); - connProps.setProperty(TSDBDriver.PROPERTY_KEY_PASSWORD, "taosdata"); - connProps.setProperty(TSDBDriver.PROPERTY_KEY_CONFIG_DIR, "/etc/taos"); - connProps.setProperty(TSDBDriver.PROPERTY_KEY_CHARSET, "UTF-8"); - connProps.setProperty(TSDBDriver.PROPERTY_KEY_LOCALE, "en_US.UTF-8"); - connProps.setProperty(TSDBDriver.PROPERTY_KEY_TIME_ZONE, "UTC-8"); - Connection conn = DriverManager.getConnection(jdbcUrl, connProps); - return conn; -} -``` - -3. Configuration file (taos.cfg) - - Default configuration file is _/var/lib/taos/taos.cfg_ On Linux and _C:\TDengine\cfg\taos.cfg_ on Windows -```properties -# client default username -# defaultUser root - -# client default password -# defaultPass taosdata - -# default system charset -# charset UTF-8 - -# system locale -# locale en_US.UTF-8 -``` -> More options can refer to [client configuration][13] - -### Create databases and tables - -```java -Statement stmt = conn.createStatement(); - -// create database -stmt.executeUpdate("create database if not exists db"); - -// use database -stmt.executeUpdate("use db"); - -// create table -stmt.executeUpdate("create table if not exists tb (ts timestamp, temperature int, humidity float)"); -``` -> Note: if no step like `use db`, the name of database must be added as prefix like _db.tb_ when operating on tables - -### Insert data - -```java -// insert data -int affectedRows = stmt.executeUpdate("insert into tb values(now, 23, 10.3) (now + 1s, 20, 9.3)"); - -System.out.println("insert " + affectedRows + " rows."); -``` -> _now_ is the server time. -> _now+1s_ is 1 second later than current server time. The time unit includes: _a_(millisecond), _s_(second), _m_(minute), _h_(hour), _d_(day), _w_(week), _n_(month), _y_(year). - -### Query database - -```java -// query data -ResultSet resultSet = stmt.executeQuery("select * from tb"); - -Timestamp ts = null; -int temperature = 0; -float humidity = 0; -while(resultSet.next()){ - - ts = resultSet.getTimestamp(1); - temperature = resultSet.getInt(2); - humidity = resultSet.getFloat("humidity"); - - System.out.printf("%s, %d, %s\n", ts, temperature, humidity); -} -``` -> query is consistent with relational database. The subscript start with 1 when retrieving return results. It is recommended to use the column name to retrieve results. - -### Close all - -```java -resultSet.close(); -stmt.close(); -conn.close(); -``` -> `please make sure the connection is closed to avoid the error like connection leakage` - -## Using connection pool - -**HikariCP** - -* dependence in pom.xml: -```xml - - com.zaxxer - HikariCP - 3.4.1 - -``` - -* Examples: -```java - public static void main(String[] args) throws SQLException { - HikariConfig config = new HikariConfig(); - config.setJdbcUrl("jdbc:TAOS://127.0.0.1:6030/log"); - config.setUsername("root"); - config.setPassword("taosdata"); - - config.setMinimumIdle(3); //minimum number of idle connection - config.setMaximumPoolSize(10); //maximum number of connection in the pool - config.setConnectionTimeout(10000); //maximum wait milliseconds for get connection from pool - config.setIdleTimeout(60000); // max idle time for recycle idle connection - config.setConnectionTestQuery("describe log.dn"); //validation query - config.setValidationTimeout(3000); //validation query timeout - - HikariDataSource ds = new HikariDataSource(config); //create datasource - - Connection connection = ds.getConnection(); // get connection - Statement statement = connection.createStatement(); // get statement - - //query or insert - // ... - - connection.close(); // put back to conneciton pool -} -``` -> The close() method will not close the connection from HikariDataSource.getConnection(). Instead, the connection is put back to the connection pool. -> More instructions can refer to [User Guide][5] - -**Druid** - -* dependency in pom.xml: - -```xml - - com.alibaba - druid - 1.1.20 - -``` - -* Examples: -```java -public static void main(String[] args) throws Exception { - Properties properties = new Properties(); - properties.put("driverClassName","com.taosdata.jdbc.TSDBDriver"); - properties.put("url","jdbc:TAOS://127.0.0.1:6030/log"); - properties.put("username","root"); - properties.put("password","taosdata"); - - properties.put("maxActive","10"); //maximum number of connection in the pool - properties.put("initialSize","3");//initial number of connection - properties.put("maxWait","10000");//maximum wait milliseconds for get connection from pool - properties.put("minIdle","3");//minimum number of connection in the pool - - properties.put("timeBetweenEvictionRunsMillis","3000");// the interval milliseconds to test connection - - properties.put("minEvictableIdleTimeMillis","60000");//the minimum milliseconds to keep idle - properties.put("maxEvictableIdleTimeMillis","90000");//the maximum milliseconds to keep idle - - properties.put("validationQuery","describe log.dn"); //validation query - properties.put("testWhileIdle","true"); // test connection while idle - properties.put("testOnBorrow","false"); // don't need while testWhileIdle is true - properties.put("testOnReturn","false"); // don't need while testWhileIdle is true - - //create druid datasource - DataSource ds = DruidDataSourceFactory.createDataSource(properties); - Connection connection = ds.getConnection(); // get connection - Statement statement = connection.createStatement(); // get statement - - //query or insert - // ... - - connection.close(); // put back to conneciton pool -} -``` -> More instructions can refer to [User Guide][6] - -**Notice** -* TDengine `v1.6.4.1` provides a function `select server_status()` to check heartbeat. It is highly recommended to use this function for `Validation Query`. - -As follows,`1` will be returned if `select server_status()` is successfully executed。 -```shell -taos> select server_status(); -server_status()| -================ -1 | -Query OK, 1 row(s) in set (0.000141s) -``` - -## Python Connector - -### Install TDengine Python client - -Users can find python client packages in our source code directory _src/connector/python_. There are two directories corresponding two python versions. Please choose the correct package to install. Users can use _pip_ command to install: - -```cmd -pip install src/connector/python/python2/ -``` - -or - -``` -pip install src/connector/python/python3/ -``` - -If _pip_ command is not installed on the system, users can choose to install pip or just copy the _taos_ directory in the python client directory to the application directory to use. - -### Python client interfaces - -To use TDengine Python client, import TDengine module at first: - -```python -import taos -``` - -Users can get module information from Python help interface or refer to our [python code example](). We list the main classes and methods below: - -- _TDengineConnection_ class - - Run `help(taos.TDengineConnection)` in python terminal for details. - -- _TDengineCursor_ class - - Run `help(taos.TDengineCursor)` in python terminal for details. - -- connect method - - Open a connection. Run `help(taos.connect)` in python terminal for details. - -## RESTful Connector - -TDengine also provides RESTful API to satisfy developing on different platforms. Unlike other databases, TDengine RESTful API applies operations to the database through the SQL command in the body of HTTP POST request. What users are required to provide is just a URL. - - -For the time being, TDengine RESTful API uses a _\_ generated from username and password for identification. Safer identification methods will be provided in the future. - - -### HTTP URL encoding - -To use TDengine RESTful API, the URL should have the following encoding format: -``` -http://:/rest/sql -``` -- _ip_: IP address of any node in a TDengine cluster -- _PORT_: TDengine HTTP service port. It is 6020 by default. - -For example, the URL encoding _http://192.168.0.1:6020/rest/sql_ used to send HTTP request to a TDengine server with IP address as 192.168.0.1. - -It is required to add a token in an HTTP request header for identification. - -``` -Authorization: Basic -``` - -The HTTP request body contains the SQL command to run. If the SQL command contains a table name, it should also provide the database name it belongs to in the form of `.`. Otherwise, an error code is returned. - -For example, use _curl_ command to send a HTTP request: - -``` -curl -H 'Authorization: Basic ' -d '' :/rest/sql -``` - -or use - -``` -curl -u username:password -d '' :/rest/sql -``` - -where `TOKEN` is the encryted string of `{username}:{password}` using the Base64 algorithm, e.g. `root:taosdata` will be encoded as `cm9vdDp0YW9zZGF0YQ==` - -### HTTP response - -The HTTP resonse is in JSON format as below: - -``` -{ - "status": "succ", - "head": ["column1","column2", …], - "data": [ - ["2017-12-12 23:44:25.730", 1], - ["2017-12-12 22:44:25.728", 4] - ], - "rows": 2 -} -``` -Specifically, -- _status_: the result of the operation, success or failure -- _head_: description of returned result columns -- _data_: the returned data array. If no data is returned, only an _affected_rows_ field is listed -- _rows_: the number of rows returned - -### Example - -- Use _curl_ command to query all the data in table _t1_ of database _demo_: - - `curl -H 'Authorization: Basic cm9vdDp0YW9zZGF0YQ==' -d 'select * from demo.t1' 192.168.0.1:6020/rest/sql` - -The return value is like: - -``` -{ - "status": "succ", - "head": ["column1","column2","column3"], - "data": [ - ["2017-12-12 23:44:25.730", 1, 2.3], - ["2017-12-12 22:44:25.728", 4, 5.6] - ], - "rows": 2 -} -``` - -- Use HTTP to create a database: - - `curl -H 'Authorization: Basic cm9vdDp0YW9zZGF0YQ==' -d 'create database demo' 192.168.0.1:6020/rest/sql` - - The return value should be: - -``` -{ - "status": "succ", - "head": ["affected_rows"], - "data": [[1]], - "rows": 1, -} -``` - -## Go Connector - -TDengine provides a GO client package `taosSql`. `taosSql` implements a kind of interface of GO `database/sql/driver`. User can access TDengine by importing the package in their program with the following instructions, detailed usage please refer to `https://github.com/taosdata/driver-go/blob/develop/taosSql/driver_test.go` - -```Go -import ( - "database/sql" - _ github.com/taosdata/driver-go/taoSql“ -) -``` -### API - -* `sql.Open(DRIVER_NAME string, dataSourceName string) *DB` - - Open DB, generally DRIVER_NAME will be used as a constant with default value `taosSql`, dataSourceName is a combined String with format `user:password@/tcp(host:port)/dbname`. If user wants to access TDengine with multiple goroutine concurrently, the better way is to create an sql.Open object in each goroutine to access TDengine. - - **Note**: When calling this api, only a few initial work are done, instead the validity check happened during executing `Query` or `Exec`, at this time the connection will be created, and system will check if `user、password、host、port` is valid. Additionaly the most of features are implemented in the taosSql dependency lib `libtaos`, from this view, sql.Open is lightweight. - -* `func (db *DB) Exec(query string, args ...interface{}) (Result, error)` - - Execute non-Query related SQLs, the execution result is stored with type of Result. - - -* `func (db *DB) Query(query string, args ...interface{}) (*Rows, error)` - - Execute Query related SQLs, the execution result is *Raw, the detailed usage can refer GO interface `database/sql/driver` - -## Node.js Connector - -TDengine also provides a node.js connector package that is installable through [npm](https://www.npmjs.com/). The package is also in our source code at *src/connector/nodejs/*. The following instructions are also available [here](https://github.com/taosdata/tdengine/tree/master/src/connector/nodejs) - -To get started, just type in the following to install the connector through [npm](https://www.npmjs.com/). - -```cmd -npm install td-connector -``` - -It is highly suggested you use npm. If you don't have it installed, you can also just copy the nodejs folder from *src/connector/nodejs/* into your node project folder. - -To interact with TDengine, we make use of the [node-gyp](https://github.com/nodejs/node-gyp) library. To install, you will need to install the following depending on platform (the following instructions are quoted from node-gyp) - -### On Unix - -- `python` (`v2.7` recommended, `v3.x.x` is **not** supported) -- `make` -- A proper C/C++ compiler toolchain, like [GCC](https://gcc.gnu.org) - -### On macOS - -- `python` (`v2.7` recommended, `v3.x.x` is **not** supported) (already installed on macOS) - -- Xcode - - - You also need to install the - - ``` - Command Line Tools - ``` - - via Xcode. You can find this under the menu - - ``` - Xcode -> Preferences -> Locations - ``` - - (or by running - - ``` - xcode-select --install - ``` - - in your Terminal) - - - This step will install `gcc` and the related toolchain containing `make` - -### On Windows - -#### Option 1 - -Install all the required tools and configurations using Microsoft's [windows-build-tools](https://github.com/felixrieseberg/windows-build-tools) using `npm install --global --production windows-build-tools` from an elevated PowerShell or CMD.exe (run as Administrator). - -#### Option 2 - -Install tools and configuration manually: - -- Install Visual C++ Build Environment: [Visual Studio Build Tools](https://visualstudio.microsoft.com/thank-you-downloading-visual-studio/?sku=BuildTools) (using "Visual C++ build tools" workload) or [Visual Studio 2017 Community](https://visualstudio.microsoft.com/pl/thank-you-downloading-visual-studio/?sku=Community) (using the "Desktop development with C++" workload) -- Install [Python 2.7](https://www.python.org/downloads/) (`v3.x.x` is not supported), and run `npm config set python python2.7` (or see below for further instructions on specifying the proper Python version and path.) -- Launch cmd, `npm config set msvs_version 2017` - -If the above steps didn't work for you, please visit [Microsoft's Node.js Guidelines for Windows](https://github.com/Microsoft/nodejs-guidelines/blob/master/windows-environment.md#compiling-native-addon-modules) for additional tips. - -To target native ARM64 Node.js on Windows 10 on ARM, add the components "Visual C++ compilers and libraries for ARM64" and "Visual C++ ATL for ARM64". - -### Usage - -The following is a short summary of the basic usage of the connector, the full api and documentation can be found [here](http://docs.taosdata.com/node) - -#### Connection - -To use the connector, first require the library ```td-connector```. Running the function ```taos.connect``` with the connection options passed in as an object will return a TDengine connection object. The required connection option is ```host```, other options if not set, will be the default values as shown below. - -A cursor also needs to be initialized in order to interact with TDengine from Node.js. - -```javascript -const taos = require('td-connector'); -var conn = taos.connect({host:"127.0.0.1", user:"root", password:"taosdata", config:"/etc/taos",port:0}) -var cursor = conn.cursor(); // Initializing a new cursor -``` - -To close a connection, run - -```javascript -conn.close(); -``` - -#### Queries - -We can now start executing simple queries through the ```cursor.query``` function, which returns a TaosQuery object. - -```javascript -var query = cursor.query('show databases;') -``` - -We can get the results of the queries through the ```query.execute()``` function, which returns a promise that resolves with a TaosResult object, which contains the raw data and additional functionalities such as pretty printing the results. - -```javascript -var promise = query.execute(); -promise.then(function(result) { - result.pretty(); //logs the results to the console as if you were in the taos shell -}); -``` - -You can also query by binding parameters to a query by filling in the question marks in a string as so. The query will automatically parse what was binded and convert it to the proper format for use with TDengine - -```javascript -var query = cursor.query('select * from meterinfo.meters where ts <= ? and areaid = ?;').bind(new Date(), 5); -query.execute().then(function(result) { - result.pretty(); -}) -``` - -The TaosQuery object can also be immediately executed upon creation by passing true as the second argument, returning a promise instead of a TaosQuery. - -```javascript -var promise = cursor.query('select * from meterinfo.meters where v1 = 30;', true) -promise.then(function(result) { - result.pretty(); -}) -``` -#### Async functionality - -Async queries can be performed using the same functions such as `cursor.execute`, `cursor.query`, but now with `_a` appended to them. - -Say you want to execute an two async query on two seperate tables, using `cursor.query_a`, you can do that and get a TaosQuery object, which upon executing with the `execute_a` function, returns a promise that resolves with a TaosResult object. - -```javascript -var promise1 = cursor.query_a('select count(*), avg(v1), avg(v2) from meter1;').execute_a() -var promise2 = cursor.query_a('select count(*), avg(v1), avg(v2) from meter2;').execute_a(); -promise1.then(function(result) { - result.pretty(); -}) -promise2.then(function(result) { - result.pretty(); -}) -``` - - -### Example - -An example of using the NodeJS connector to create a table with weather data and create and execute queries can be found [here](https://github.com/taosdata/TDengine/tree/master/tests/examples/nodejs/node-example.js) (The preferred method for using the connector) - -An example of using the NodeJS connector to achieve the same things but without all the object wrappers that wrap around the data returned to achieve higher functionality can be found [here](https://github.com/taosdata/TDengine/tree/master/tests/examples/nodejs/node-example-raw.js) - -[1]: https://search.maven.org/artifact/com.taosdata.jdbc/taos-jdbcdriver -[2]: https://mvnrepository.com/artifact/com.taosdata.jdbc/taos-jdbcdriver -[3]: https://github.com/taosdata/TDengine -[4]: https://www.taosdata.com/blog/2019/12/03/jdbcdriver%e6%89%be%e4%b8%8d%e5%88%b0%e5%8a%a8%e6%80%81%e9%93%be%e6%8e%a5%e5%ba%93/ -[5]: https://github.com/brettwooldridge/HikariCP -[6]: https://github.com/alibaba/druid -[7]: https://github.com/taosdata/TDengine/issues -[8]: https://search.maven.org/artifact/com.taosdata.jdbc/taos-jdbcdriver -[9]: https://mvnrepository.com/artifact/com.taosdata.jdbc/taos-jdbcdriver -[10]: https://maven.aliyun.com/mvn/search -[11]: https://github.com/taosdata/TDengine/tree/develop/tests/examples/JDBC/SpringJdbcTemplate -[12]: https://github.com/taosdata/TDengine/tree/develop/tests/examples/JDBC/springbootdemo -[13]: https://www.taosdata.com/cn/documentation20/administrator/#%E5%AE%A2%E6%88%B7%E7%AB%AF%E9%85%8D%E7%BD%AE -[14]: https://www.taosdata.com/cn/documentation20/connector/#Windows -[15]: https://www.taosdata.com/cn/getting-started/#%E5%BF%AB%E9%80%9F%E4%B8%8A%E6%89%8B \ No newline at end of file diff --git a/documentation20/webdocs/markdowndocs/Contributor_License_Agreement.md b/documentation20/webdocs/markdowndocs/Contributor_License_Agreement.md deleted file mode 100644 index 8c158da4c5958384064b9993de6643be86b94fee..0000000000000000000000000000000000000000 --- a/documentation20/webdocs/markdowndocs/Contributor_License_Agreement.md +++ /dev/null @@ -1,35 +0,0 @@ -# TaosData Contributor License Agreement - -This TaosData Contributor License Agreement (CLA) applies to any contribution you make to any TaosData projects. If you are representing your employing organization to sign this agreement, please warrant that you have the authority to grant the agreement. - -## Terms - -**"TaosData"**, **"we"**, **"our"** and **"us"** means TaosData, inc. - -**"You"** and **"your"** means you or the organization you are on behalf of to sign this agreement. - -**"Contribution"** means any original work you, or the organization you represent submit to TaosData for any project in any manner. - -## Copyright License - -All rights of your Contribution submitted to TaosData in any manner are granted to TaosData and recipients of software distributed by TaosData. You waive any rights that my affect our ownership of the copyright and grant to us a perpetual, worldwide, transferable, non-exclusive, no-charge, royalty-free, irrevocable, and sublicensable license to use, reproduce, prepare derivative works of, publicly display, publicly perform, sublicense, and distribute Contributions and any derivative work created based on a Contribution. - -## Patent License - -With respect to any patents you own or that you can license without payment to any third party, you grant to us and to any recipient of software distributed by us, a perpetual, worldwide, transferable, non-exclusive, no-charge, royalty-free, irrevocable patent license to make, have make, use, sell, offer to sell, import, and otherwise transfer the Contribution in whole or in part, alone or included in any product under any patent you own, or license from a third party, that is necessarily infringed by the Contribution or by combination of the Contribution with any Work. - -## Your Representations and Warranties - -You represent and warrant that: - -- the Contribution you submit is an original work that you can legally grant the rights set out in this agreement. - -- the Contribution you submit and licenses you granted does not and will not, infringe the rights of any third party. - -- you are not aware of any pending or threatened claims, suits, actions, or charges pertaining to the contributions. You also warrant to notify TaosData immediately if you become aware of any such actual or potential claims, suits, actions, allegations or charges. - -## Support - -You are not obligated to support your Contribution except you volunteer to provide support. If you want, you can provide for a fee. - -**I agree and accept on behalf of myself and behalf of my organization:** \ No newline at end of file diff --git a/documentation20/webdocs/markdowndocs/Documentation.md b/documentation20/webdocs/markdowndocs/Documentation.md deleted file mode 100644 index bdafd40f7c76425a4f9734a2561b2b9a945c757f..0000000000000000000000000000000000000000 --- a/documentation20/webdocs/markdowndocs/Documentation.md +++ /dev/null @@ -1,87 +0,0 @@ -#Documentation - -TDengine is a highly efficient platform to store, query, and analyze time-series data. It works like a relational database, but you are strongly suggested to read through the following documentation before you experience it. - -##Getting Started - -- Quick Start: download, install and experience TDengine in a few seconds -- TDengine Shell: command-line interface to access TDengine server -- Major Features: insert/query, aggregation, cache, pub/sub, continuous query - -## Data Model and Architecture - -- Data Model: relational database model, but one table for one device with static tags -- Architecture: Management Module, Data Module, Client Module -- Writing Process: records recieved are written to WAL, cache, then ack is sent back to client -- Data Storage: records are sharded in the time range, and stored column by column - -##TAOS SQL - -- Data Types: support timestamp, int, float, double, binary, nchar, bool, and other types -- Database Management: add, drop, check databases -- Table Management: add, drop, check, alter tables -- Inserting Records: insert one or more records into tables, historical records can be imported -- Data Query: query data with time range and filter conditions, support limit/offset -- SQL Functions: support aggregation, selector, transformation functions -- Downsampling: aggregate data in successive time windows, support interpolation - -##STable: Super Table - -- What is a Super Table: an innovated way to aggregate tables -- Create a STable: it is like creating a standard table, but with tags defined -- Create a Table via STable: use STable as the template, with tags specified -- Aggregate Tables via STable: group tables together by specifying the tags filter condition -- Create Table Automatically: create tables automatically with a STable as a template -- Management of STables: create/delete/alter super table just like standard tables -- Management of Tags: add/delete/alter tags on super tables or tables - -##Advanced Features - -- Continuous Query: query executed by TDengine periodically with a sliding window -- Publisher/Subscriber: subscribe to the newly arrived data like a typical messaging system -- Caching: the newly arrived data of each device/table will always be cached - -##Connector - -- C/C++ Connector: primary method to connect to the server through libtaos client library -- Java Connector: driver for connecting to the server from Java applications using the JDBC API -- Python Connector: driver for connecting to the server from Python applications -- RESTful Connector: a simple way to interact with TDengine via HTTP -- Go Connector: driver for connecting to the server from Go applications -- Node.js Connector: driver for connecting to the server from node applications - -##Connections with Other Tools - -- Telegraf: pass the collected DevOps metrics to TDengine -- Grafana: query the data saved in TDengine and visualize them -- Matlab: access TDengine server from Matlab via JDBC -- R: access TDengine server from R via JDBC - -##Administrator - -- Directory and Files: files and directories related with TDengine -- Configuration on Server: customize IP port, cache size, file block size and other settings -- Configuration on Client: customize locale, default user and others -- User Management: add/delete users, change passwords -- Import Data: import data into TDengine from either script or CSV file -- Export Data: export data either from TDengine shell or from tool taosdump -- Management of Connections, Streams, Queries: check or kill the connections, queries -- System Monitor: collect the system metric, and log important operations - -##More on System Architecture - -- Storage Design: column-based storage with optimization on time-series data -- Query Design: an efficient way to query time-series data -- Technical blogs to delve into the inside of TDengine - -## More on IoT Big Data - -- [Characteristics of IoT Big Data](https://www.taosdata.com/blog/2019/07/09/characteristics-of-iot-big-data/) -- [Why don’t General Big Data Platforms Fit IoT Scenarios?](https://www.taosdata.com/blog/2019/07/09/why-does-the-general-big-data-platform-not-fit-iot-data-processing/) -- [Why TDengine is the Best Choice for IoT Big Data Processing?](https://www.taosdata.com/blog/2019/07/09/why-tdengine-is-the-best-choice-for-iot-big-data-processing/) - -##Tutorials & FAQ - -- FAQ: a list of frequently asked questions and answers -- Use cases: a few typical cases to explain how to use TDengine in IoT platform - diff --git a/documentation20/webdocs/markdowndocs/Getting Started.md b/documentation20/webdocs/markdowndocs/Getting Started.md deleted file mode 100644 index 4d34cb49f4a84ac6c9d63e47bc8230c150b9013e..0000000000000000000000000000000000000000 --- a/documentation20/webdocs/markdowndocs/Getting Started.md +++ /dev/null @@ -1,151 +0,0 @@ -#Getting Started - -## Quick Start - -At the moment, TDengine only runs on Linux. You can set up and install it either from the source code or the packages. It takes only a few seconds from download to run it successfully. - -### Install from Source - -Please visit our [github page](https://github.com/taosdata/TDengine) for instructions on installation from the source code. - -### Install from Package - -Three different packages are provided, please pick up the one you like. - -For the time being, TDengine only supports installation on Linux systems using [`systemd`](https://en.wikipedia.org/wiki/Systemd) as the service manager. To check if your system has *systemd* package, use the _which systemctl_ command. - -```cmd -which systemctl -``` - -If the `systemd` package is not found, please [install from source code](#Install-from-Source). - -### Running TDengine - -After installation, start the TDengine service by the `systemctl` command. - -```cmd -systemctl start taosd -``` - -Then check if the server is working now. -```cmd -systemctl status taosd -``` - -If the service is running successfully, you can play around through TDengine shell `taos`, the command line interface tool located in directory /usr/local/bin/taos - -**Note: The _systemctl_ command needs the root privilege. Use _sudo_ if you are not the _root_ user.** - -##TDengine Shell -To launch TDengine shell, the command line interface, in a Linux terminal, type: - -```cmd -taos -``` - -The welcome message is printed if the shell connects to TDengine server successfully, otherwise, an error message will be printed (refer to our [FAQ](../faq) page for troubleshooting the connection error). The TDengine shell prompt is: - -```cmd -taos> -``` - -In the TDengine shell, you can create databases, create tables and insert/query data with SQL. Each query command ends with a semicolon. It works like MySQL, for example: - -```mysql -create database db; -use db; -create table t (ts timestamp, cdata int); -insert into t values ('2019-07-15 10:00:00', 10); -insert into t values ('2019-07-15 10:01:05', 20); -select * from t; - ts | speed | -=================================== - 19-07-15 10:00:00.000| 10| - 19-07-15 10:01:05.000| 20| -Query OK, 2 row(s) in set (0.001700s) -``` - -Besides the SQL commands, the system administrator can check system status, add or delete accounts, and manage the servers. - -###Shell Command Line Parameters - -You can run `taos` command with command line options to fit your needs. Some frequently used options are listed below: - -- -c, --config-dir: set the configuration directory. It is _/etc/taos_ by default -- -h, --host: set the IP address of the server it will connect to, Default is localhost -- -s, --commands: set the command to run without entering the shell -- -u, -- user: user name to connect to server. Default is root -- -p, --password: password. Default is 'taosdata' -- -?, --help: get a full list of supported options - -Examples: - -```cmd -taos -h 192.168.0.1 -s "use db; show tables;" -``` - -###Run Batch Commands - -Inside TDengine shell, you can run batch commands in a file with *source* command. - -``` -taos> source ; -``` - -### Tips - -- Use up/down arrow key to check the command history -- To change the default password, use "`alter user`" command -- ctrl+c to interrupt any queries -- To clean the cached schema of tables or STables, execute command `RESET QUERY CACHE` - -## Major Features - -The core functionality of TDengine is the time-series database. To reduce the development and management complexity, and to improve the system efficiency further, TDengine also provides caching, pub/sub messaging system, and stream computing functionalities. It provides a full stack for IoT big data platform. The detailed features are listed below: - -- SQL like query language used to insert or explore data - -- C/C++, Java(JDBC), Python, Go, RESTful, and Node.JS interfaces for development - -- Ad hoc queries/analysis via Python/R/Matlab or TDengine shell - -- Continuous queries to support sliding-window based stream computing - -- Super table to aggregate multiple time-streams efficiently with flexibility - -- Aggregation over a time window on one or multiple time-streams - -- Built-in messaging system to support publisher/subscriber model - -- Built-in cache for each time stream to make latest data available as fast as light speed - -- Transparent handling of historical data and real-time data - -- Integrating with Telegraf, Grafana and other tools seamlessly - -- A set of tools or configuration to manage TDengine - - -For enterprise edition, TDengine provides more advanced features below: - -- Linear scalability to deliver higher capacity/throughput - -- High availability to guarantee the carrier-grade service - -- Built-in replication between nodes which may span multiple geographical sites - -- Multi-tier storage to make historical data management simpler and cost-effective - -- Web-based management tools and other tools to make maintenance simpler - -TDengine is specially designed and optimized for time-series data processing in IoT, connected cars, Industrial IoT, IT infrastructure and application monitoring, and other scenarios. Compared with other solutions, it is 10x faster on insert/query speed. With a single-core machine, over 20K requestes can be processed, millions data points can be ingested, and over 10 million data points can be retrieved in a second. Via column-based storage and tuned compression algorithm for different data types, less than 1/10 storage space is required. - -## Explore More on TDengine - -Please read through the whole documentation to learn more about TDengine. - diff --git a/documentation20/webdocs/markdowndocs/More on System Architecture-ch.md b/documentation20/webdocs/markdowndocs/More on System Architecture-ch.md deleted file mode 100644 index 44d572268de04662c190a6a5975c784b38aad117..0000000000000000000000000000000000000000 --- a/documentation20/webdocs/markdowndocs/More on System Architecture-ch.md +++ /dev/null @@ -1,248 +0,0 @@ -# TDengine的技术设计 - -## 存储设计 - -TDengine的数据存储主要包含**元数据的存储**和**写入数据的存储**。以下章节详细介绍了TDengine各种数据的存储结构。 - -### 元数据的存储 - -TDengine中的元数据信息包括TDengine中的数据库,表,超级表等信息。元数据信息默认存放在 _/var/lib/taos/mgmt/_ 文件夹下。该文件夹的目录结构如下所示: -``` -/var/lib/taos/ - +--mgmt/ - +--db.db - +--meters.db - +--user.db - +--vgroups.db -``` -元数据在文件中按顺序排列。文件中的每条记录代表TDengine中的一个元数据机构(数据库、表等)。元数据文件只进行追加操作,即便是元数据的删除,也只是在数据文件中追加一条删除的记录。 - -### 写入数据的存储 - -TDengine中写入的数据在硬盘上是按时间维度进行分片的。同一个vnode中的表在同一时间范围内的数据都存放在同一文件组中,如下图中的v0f1804*文件。这一数据分片方式可以大大简化数据在时间维度的查询,提高查询速度。在默认配置下,硬盘上的每个文件存放10天数据。用户可根据需要调整数据库的 _daysPerFile_ 配置项进行配置。 数据在文件中是按块存储的。每个数据块只包含一张表的数据,且数据是按照时间主键递增排列的。数据在数据块中按列存储,这样使得同类型的数据存放在一起,可以大大提高压缩的比例,节省存储空间。TDengine对不同类型的数据采用了不同的压缩算法进行压缩,以达到最优的压缩结果。TDengine使用的压缩算法包括simple8B、delta-of-delta、RLE以及LZ4等。 - -TDengine的数据文件默认存放在 */var/lib/taos/data/* 下。而 */var/lib/taos/tsdb/* 文件夹下存放了vnode的信息、vnode中表的信息以及数据文件的链接等。其完整目录结构如下所示: -``` -/var/lib/taos/ - +--tsdb/ - | +--vnode0 - | +--meterObj.v0 - | +--db/ - | +--v0f1804.head->/var/lib/taos/data/vnode0/v0f1804.head1 - | +--v0f1804.data->/var/lib/taos/data/vnode0/v0f1804.data - | +--v0f1804.last->/var/lib/taos/data/vnode0/v0f1804.last1 - | +--v0f1805.head->/var/lib/taos/data/vnode0/v0f1805.head1 - | +--v0f1805.data->/var/lib/taos/data/vnode0/v0f1805.data - | +--v0f1805.last->/var/lib/taos/data/vnode0/v0f1805.last1 - | : - +--data/ - +--vnode0/ - +--v0f1804.head1 - +--v0f1804.data - +--v0f1804.last1 - +--v0f1805.head1 - +--v0f1805.data - +--v0f1805.last1 - : -``` - -#### meterObj文件 -每个vnode中只存在一个 _meterObj_ 文件。该文件中存储了vnode的基本信息(创建时间,配置信息,vnode的统计信息等)以及该vnode中表的信息。其结构如下所示: -``` -<文件开始> -[文件头] -[表记录1偏移量和长度] -[表记录2偏移量和长度] -... -[表记录N偏移量和长度] -[表记录1] -[表记录2] -... -[表记录N] -[表记录] -<文件结尾> -``` -其中,文件头大小为512字节,主要存放vnode的基本信息。每条表记录代表属于该vnode中的一张表在硬盘上的表示。 - -#### head文件 -head文件中存放了其对应的data文件中数据块的索引信息。该文件组织形式如下: -``` -<文件开始> -[文件头] -[表1偏移量] -[表2偏移量] -... -[表N偏移量] -[表1数据索引] -[表2数据索引] -... -[表N数据索引] -<文件结尾> -``` -文件开头的偏移量列表表示对应表的数据索引块的开始位置在文件中的偏移量。每张表的数据索引信息在head文件中都是连续存放的。这也使得TDengine在读取单表数据时,可以将该表所有的数据块索引一次性读入内存,大大提高读取速度。表的数据索引块组织如下: -``` -[索引块信息] -[数据块1索引] -[数据块2索引] -... -[数据块N索引] -``` -其中,索引块信息中记录了数据块的个数等描述信息。每个数据块索引对应一个在data文件或last文件中的一个单独的数据块。索引信息中记录了数据块存放的文件、数据块起始位置的偏移量、数据块中数据时间主键的范围等。索引块中的数据块索引是按照时间范围顺序排放的,这也就是说,索引块M对应的数据块中的数据时间范围都大于索引块M-1的。这种预先排序的存储方式使得在TDengine在进行按照时间戳进行查询时可以使用折半查找算法,大大提高查询速度。 - -#### data文件 -data文件中存放了真实的数据块。该文件只进行追加操作。其文件组织形式如下: -``` -<文件开始> -[文件头] -[数据块1] -[数据块2] -... -[数据块N] -<文件结尾> -``` -每个数据块只属于vnode中的一张表,且数据块中的数据按照时间主键排列。数据块中的数据按列组织排放,使得同一类型的数据排放在一起,方便压缩和读取。每个数据块的组织形式如下所示: -``` -[列1信息] -[列2信息] -... -[列N信息] -[列1数据] -[列2数据] -... -[列N数据] -``` -列信息中包含该列的类型,列的压缩算法,列数据在文件中的偏移量以及长度等。除此之外,列信息中也包含该内存块中该列数据的预计算结果,从而在过滤查询时根据预计算结果判定是否读取数据块,大大提高读取速度。 - -#### last文件 -为了防止数据块的碎片化,提高查询速度和压缩率,TDengine引入了last文件。当要落盘的数据块中的数据条数低于某个阈值时,TDengine会先将该数据块写入到last文件中进行暂时存储。当有新的数据需要落盘时,last文件中的数据会被读取出来与新数据组成新的数据块写入到data文件中。last文件的组织形式与data文件类似。 - -### TDengine数据存储小结 -TDengine通过其创新的架构和存储结构设计,有效提高了计算机资源的使用率。一方面,TDengine的虚拟化使得TDengine的水平扩展及备份非常容易。另一方面,TDengine将表中数据按时间主键排序存储且其列式存储的组织形式都使TDengine在写入、查询以及压缩方面拥有非常大的优势。 - - -## 查询处理 - -### 概述 - -TDengine提供了多种多样针对表和超级表的查询处理功能,除了常规的聚合查询之外,还提供针对时序数据的窗口查询、统计聚合等功能。TDengine的查询处理需要客户端、管理节点、数据节点协同完成。 各组件包含的与查询处理相关的功能和模块如下: - -客户端(Client App)。客户端包含TAOS SQL的解析(SQL Parser)和查询请求执行器(Query Executor),第二阶段聚合器(Result Merger),连续查询管理器(Continuous Query Manager)等主要功能模块构成。SQL解析器负责对SQL语句进行解析校验,并转化为抽象语法树,查询执行器负责将抽象语法树转化查询执行逻辑,并根据SQL语句查询条件,将其转换为针对管理节点元数据查询和针对数据节点的数据查询两级查询处理。由于TAOS SQL当前不提供复杂的嵌套查询和pipeline查询处理机制,所以不再需要查询计划优化、逻辑查询计划到物理查询计划转换等过程。第二阶段聚合器负责将各数据节点查询返回的独立结果进行二阶段聚合生成最后的结果。连续查询管理器则负责针对用户建立的连续查询进行管理,负责定时拉起查询请求并按需将结果写回TDengine或返回给客户应用。此外,客户端还负责查询失败后重试、取消查询请求、以及维持连接心跳、向管理节点上报查询状态等工作。 - -管理节点(Management Node)。管理节点保存了整个集群系统的全部数据的元数据信息,向客户端节点提供查询所需的数据的元数据,并根据集群的负载情况切分查询请求。通过超级表包含了通过该超级表创建的所有表的信息,因此查询处理器(Query Executor)负责针对标签(TAG)的查询处理,并将满足标签查询请求的表信息返回给客户端。此外,管理节点还维护集群的查询状态(Query Status Manager)维护,查询状态管理中在内存中临时保存有当前正在执行的全部查询,当客户端使用 *show queries* 命令的时候,将当前系统正在运行的查询信息返回客户端。 - -数据节点(Data Node)。数据节点保存了数据库中全部数据内容,并通过查询执行器、查询处理调度器、查询任务队列(Query Task Queue)进行查询处理的调度执行,从客户端接收到的查询处理请求都统一放置到处理队列中,查询执行器从队列中获得查询请求,并负责执行。通过查询优化器(Query Optimizer)对于查询进行基本的优化处理,以及通过数据节点的查询执行器(Query Executor)扫描符合条件的数据单元并返回计算结果。等接收客户端发出的查询请求,执行查询处理,并将结果返回。同时数据节点还需要响应来自管理节点的管理信息和命令,例如 *kill query* 命令以后,需要即刻停止执行的查询任务。 - -
-
图 1. 系统查询处理架构图(只包含查询相关组件)
- -### 普通查询处理 - -客户端、管理节点、数据节点协同完成TDengine的查询处理全流程。我们以一个具体的SQL查询为例,说明TDengine的查询处理流程。SQL语句向超级表*FOO_SUPER_TABLE*查询获取时间范围在2019年1月12日整天,标签TAG_LOC是'beijing'的表所包含的所有记录总数,SQL语句如下: - -```sql -SELECT COUNT(*) -FROM FOO_SUPER_TABLE -WHERE TAG_LOC = 'beijing' AND TS >= '2019-01-12 00:00:00' AND TS < '2019-01-13 00:00:00' -``` - -首先,客户端调用TAOS SQL解析器对SQL语句进行解析及合法性检查,然后生成语法树,并从中提取查询的对象 — 超级表 *FOO_SUPER_TABLE* ,然后解析器向管理节点(Management Node)请求其相应的元数据信息,并将过滤信息(TAG_LOC='beijing')同时发送到管理节点。 - -管理节点接收元数据获取的请求,首先找到超级表 *FOO_SUPER_TABLE* 基础信息,然后应用查询条件来过滤通过该超级表创建的全部表,最后满足查询条件(TAG_LOC='beijing'),即 *TAG_LOC* 标签列是 'beijing' 的的通过其查询执行器将满足查询要求的对象(表或超级表)的元数据信息返回给客户端。 - -客户端获得了 *FOO_SUPER_TABLE* 的元数据信息后,查询执行器根据元数据中的数据分布,分别向保存有相应数据的节点发起查询请求,此时时间戳范围过滤条件(TS >= '2019-01-12 00:00:00' AND TS < '2019-01-13 00:00:00')需要同时发送给全部的数据节点。 - -数据节点接收到发自客户端的查询,转化为内部结构并进行优化以后将其放入任务执行队列,等待查询执行器执行。当查询结果获得以后,将查询结果返回客户端。数据节点执行查询的过程均相互独立,完全只依赖于自身的数据和内容进行计算。 - -当所有查询涉及的数据节点返回结果后,客户端将每个数据节点查询的结果集再次进行聚合(针对本案例,即将所有结果再次进行累加),累加的结果即为最后的查询结果。第二阶段聚合并不是所有的查询都需要。例如,针对数据的列选取操作,实际上是不需要第二阶段聚合。 - -### REST查询处理 - -在 C/C++ 、Python接口、 JDBC 接口之外,TDengine 还提供基于 HTTP 协议的 REST 接口。不同于使用应用客户端开发程序进行的开发。当用户使用 REST 接口的时候,所有的查询处理过程都是在服务器端来完成,用户的应用服务不会参与数据库的计算过程,查询处理完成后结果通过 HTTP的 JSON 格式返回给用户。 - -
-
图 2. REST查询架构
- -当用户使用基于HTTP的REST查询接口,HTTP的请求首先与位于数据节点的HTTP连接器( Connector),建立连接,然后通过REST的签名机制,使用Token来确保请求的可靠性。对于数据节点,HTTP连接器接收到请求后,调用内嵌的客户端程序发起查询请求,内嵌客户端将解析通过HTTP连接器传递过来的SQL语句,解析该SQL语句并按需向管理节点请求元数据信息,然后向本机或集群中其他节点发送查询请求,最后按需聚合计算结果。HTTP连接器接收到请求SQL以后,后续的流程处理与采用应用客户端方式的查询处理完全一致。最后,还需要将查询的结果转换为JSON格式字符串,并通过HTTP 响应返回给客户端。 - -可以看到,在处理HTTP流程的整个过程中,用户应用不再参与到查询处理的过程中,只负责通过HTTP协议发送SQL请求并接收JSON格式的结果。同时还需要注意的是,每个数据节点均内嵌了一个HTTP连接器和客户端程序,因此请求集群中任何一个数据节点,该数据节点均能够通过HTTP协议返回用户的查询结果。 - -### 技术特征 - -由于TDengine采用数据和标签分离存储的模式,能够极大地降低标签数据存储的冗余度。标签数据直接关联到每个表,并采用全内存的结构进行管理和维护标签数据,全内存的结构提供快速的查询处理,千万级别规模的标签数据查询可以在毫秒级别返回。首先针对标签数据的过滤可以有效地降低第二阶段的查询涉及的数据规模。为有效地提升查询处理的性能,针对物联网数据的不可更改的特点,TDengine采用在每个保存的数据块上,都记录下该数据块中数据的最大值、最小值、和等统计数据。如果查询处理涉及整个数据块的全部数据,则直接使用预计算结果,不再读取数据块的内容。由于预计算模块的大小远小于磁盘上存储的具体数据的大小,对于磁盘IO为瓶颈的查询处理,使用预计算结果可以极大地减小读取IO,并加速查询处理的流程。 - -由于TDengine采用按列存储数据。当从磁盘中读取数据块进行计算的时候,按照查询列信息读取该列数据,并不需要读取其他不相关的数据,可以最小化读取数据。此外,由于采用列存储结构,数据节点针对数据的扫描采用该列数据块进行,可以充分利用CPU L2高速缓存,极大地加速数据扫描的速度。此外,对于某些查询,并不会等全部查询结果生成后再返回结果。例如,列选取查询,当第一批查询结果获得以后,数据节点直接将其返回客户端。同时,在查询处理过程中,系统在数据节点接收到查询请求以后马上返回客户端查询确认信息,并同时拉起查询处理过程,并等待查询执行完成后才返回给用户查询有响应。 - -## TDengine集群设计 - -### 1:集群与主要逻辑单元 - -TDengine是基于硬件、软件系统不可靠、一定会有故障的假设进行设计的,是基于任何单台计算机都无足够能力处理海量数据的假设进行设计的。因此TDengine从研发的第一天起,就按照分布式高可靠架构进行设计,是完全去中心化的,是水平扩展的,这样任何单台或多台服务器宕机或软件错误都不影响系统的服务。通过节点虚拟化并辅以自动化负载均衡技术,TDengine能最大限度地利用异构集群中的计算和存储资源。而且只要数据副本数大于一,无论是硬软件的升级、还是IDC的迁移等都无需停止集群的服务,极大地保证系统的正常运行,并且降低了系统管理员和运维人员的工作量。 - -下面的示例图上有八个物理节点,每个物理节点被逻辑的划分为多个虚拟节点。下面对系统的基本概念进行介绍。 - - - -![assets/nodes.png](../assets/nodes.png) - -**物理节点(dnode)**:集群中的一物理服务器或云平台上的一虚拟机。为安全以及通讯效率,一个物理节点可配置两张网卡,或两个IP地址。其中一张网卡用于集群内部通讯,其IP地址为**privateIp**, 另外一张网卡用于与集群外部应用的通讯,其IP地址为**publicIp**。在一些云平台(如阿里云),对外的IP地址是映射过来的,因此publicIp还有一个对应的内部IP地址**internalIp**(与privateIp不同)。对于只有一个IP地址的物理节点,publicIp, privateIp以及internalIp都是同一个地址,没有任何区别。一个dnode上有而且只有一个taosd实例运行。 - -**虚拟数据节点(vnode)**:在物理节点之上的可独立运行的基础逻辑单元,时序数据写入、存储、查询等操作逻辑都在虚拟节点中进行(图中V),采集的时序数据就存储在vnode上。一个vnode包含固定数量的表。当创建一张新表时,系统会检查是否需要创建新的vnode。一个物理节点上能创建的vnode的数量取决于物理节点的硬件资源。一个vnode只属于一个DB,但一个DB可以有多个vnode。 - -**虚拟数据节点组(vgroup)**: 位于不同物理节点的vnode可以组成一个虚拟数据节点组vnode group(如上图dnode0中的V0, dnode1中的V1, dnode6中的V2属于同一个虚拟节点组)。归属于同一个vgroup的虚拟节点采取master/slave的方式进行管理。写只能在master上进行,但采用asynchronous的方式将数据同步到slave,这样确保了一份数据在多个物理节点上有拷贝。如果master节点宕机,其他节点监测到后,将重新选举vgroup里的master, 新的master能继续处理数据请求,从而保证系统运行的可靠性。一个vgroup里虚拟节点个数就是数据的副本数。如果一个DB的副本数为N,系统必须有至少N个物理节点。副本数在创建DB时通过参数replica可以指定,缺省为1。使用TDengine, 数据的安全依靠多副本解决,因此不再需要昂贵的磁盘阵列等存储设备。 - -**虚拟管理节点(mnode)**:负责所有节点运行状态的监控和维护,以及节点之间的负载均衡(图中M)。同时,虚拟管理节点也负责元数据(包括用户、数据库、表、静态标签等)的存储和管理,因此也称为Meta Node。TDengine集群中可配置多个(最多不超过5个) mnode,它们自动构建成为一个管理节点集群(图中M0, M1, M2)。mnode间采用master/slave的机制进行管理,而且采取强一致方式进行数据同步。mnode集群的创建由系统自动完成,无需人工干预。每个dnode上至多有一个mnode,而且每个dnode都知道整个集群中所有mnode的IP地址。 - -**taosc**:一个软件模块,是TDengine给应用提供的驱动程序(driver),内嵌于JDBC、ODBC driver中,或者C语言连接库里。应用都是通过taosc而不是直接来与整个集群进行交互的。这个模块负责获取并缓存元数据;将插入、查询等请求转发到正确的虚拟节点;在把结果返回给应用时,还需要负责最后一级的聚合、排序、过滤等操作。对于JDBC, ODBC, C/C++接口而言,这个模块是在应用所处的计算机上运行,但消耗的资源很小。为支持全分布式的REST接口,taosc在TDengine集群的每个dnode上都有一运行实例。 - -**对外服务地址**:TDengine集群可以容纳单台、多台甚至几千台物理节点。应用只需要向集群中任何一个物理节点的publicIp发起连接即可。启动CLI应用taos时,选项-h需要提供的就是publicIp。 - -**master/secondIp**:每一个dnode都需要配置一个masterIp。dnode启动后,将对配置的masterIp发起加入集群的连接请求。masterIp是已经创建的集群中的任何一个节点的privateIp,对于集群中的第一个节点,就是它自己的privateIp。为保证连接成功,每个dnode还可配置secondIp, 该IP地址也是已创建的集群中的任何一个节点的privateIp。如果一个节点连接masterIp失败,它将试图连接secondIp。 - -dnode启动后,会获知集群的mnode IP列表,并且定时向mnode发送状态信息。 - -vnode与mnode只是逻辑上的划分,都是执行程序taosd里的不同线程而已,无需安装不同的软件,做任何特殊的配置。最小的系统配置就是一个物理节点,vnode,mnode和taosc都存在而且都正常运行,但单一节点无法保证系统的高可靠。 - -### 2:一典型的操作流程 - -为解释vnode, mnode, taosc和应用之间的关系以及各自扮演的角色,下面对写入数据这个典型操作的流程进行剖析。 - - - -![Picture1](../assets/Picture2.png) - - - -1. 应用通过JDBC、ODBC或其他API接口发起插入数据的请求。 -2. taosc会检查缓存,看是有保存有该表的meta data。如果有,直接到第4步。如果没有,taosc将向mnode发出get meta-data请求。 -3. mnode将该表的meta-data返回给taosc。Meta-data包含有该表的schema, 而且还有该表所属的vgroup信息(vnode ID以及所在的dnode的IP地址,如果副本数为N,就有N组vnodeID/IP)。如果taosc迟迟得不到mnode回应,而且存在多个mnode,taosc将向下一个mnode发出请求。 -4. taosc向master vnode发起插入请求。 -5. vnode插入数据后,给taosc一个应答,表示插入成功。如果taosc迟迟得不到vnode的回应,taosc会认为该节点已经离线。这种情况下,如果被插入的数据库有多个副本,taosc将向vgroup里下一个vnode发出插入请求。 -6. taosc通知APP,写入成功。 - -对于第二和第三步,taosc启动时,并不知道mnode的IP地址,因此会直接向配置的集群对外服务的IP地址发起请求。如果接收到该请求的dnode并没有配置mnode,该dnode会在回复的消息中告知mnode的IP地址列表(如果有多个dnodes,mnode的IP地址可以有多个),这样taosc会重新向新的mnode的IP地址发出获取meta-data的请求。 - -对于第四和第五步,没有缓存的情况下,taosc无法知道虚拟节点组里谁是master,就假设第一个vnodeID/IP就是master,向它发出请求。如果接收到请求的vnode并不是master,它会在回复中告知谁是master,这样taosc就向建议的master vnode发出请求。一旦得到插入成功的回复,taosc会缓存住master节点的信息。 - -上述是插入数据的流程,查询、计算的流程也完全一致。taosc把这些复杂的流程全部封装屏蔽了,因此应用无需处理重定向、获取meta data等细节,完全是透明的。 - -通过taosc缓存机制,只有在第一次对一张表操作时,才需要访问mnode, 因此mnode不会成为系统瓶颈。但因为schema有可能变化,而且vgroup有可能发生改变(比如负载均衡发生),因此taosc需要定时自动刷新缓存。 - -### 3:数据分区 - -vnode(虚拟数据节点)保存采集的时序数据,而且查询、计算都在这些节点上进行。为便于负载均衡、数据恢复、支持异构环境,TDengine将一个物理节点根据其计算和存储资源切分为多个vnode。这些vnode的管理是TDengine自动完成的,对应用完全透明。 - -对于单独一个数据采集点,无论其数据量多大,一个vnode(或vnode group, 如果副本数大于1)有足够的计算资源和存储资源来处理(如果每秒生成一条16字节的记录,一年产生的原始数据不到0.5G),因此TDengine将一张表的所有数据都存放在一个vnode里,而不会让同一个采集点的数据分布到两个或多个dnode上。而且一个vnode可存储多张表的数据,一个vnode可容纳的表的数目由配置参数tables指定,缺省为2000。设计上,一个vnode里所有的表都属于同一个DB。因此一个数据库DB需要的vnode或vgroup的个数等于:数据库表的数目/tables。 - -创建DB时,系统并不会马上分配资源。但当创建一张表时,系统将看是否有已经分配的vnode, 而且是否有空位,如果有,立即在该有空位的vnode创建表。如果没有,系统将从集群中,根据当前的负载情况,在一个dnode上创建一新的vnode, 然后创建表。如果DB有多个副本,系统不是只创建一个vnode,而是一个vgroup(虚拟数据节点组)。系统对vnode的数目没有任何限制,仅仅受限于物理节点本身的计算和存储资源。 - -参数tables的设置需要考虑具体场景,创建DB时,可以个性化指定该参数。该参数不宜过大,也不宜过小。过小,极端情况,就是每个数据采集点一个vnode, 这样导致系统数据文件过多。过大,虚拟化带来的优势就会丧失。给定集群计算资源的情况下,整个系统vnode的个数应该是CPU核的数目的两倍以上。 - -### 4:负载均衡 - -每个dnode(物理节点)都定时向 mnode(虚拟管理节点)报告其状态(包括硬盘空间、内存大小、CPU、网络、虚拟节点个数等),因此mnode了解整个集群的状态。基于整体状态,当mnode发现某个dnode负载过重,它会将dnode上的一个或多个vnode挪到其他dnode。在挪动过程中,对外服务继续进行,数据插入、查询和计算操作都不受影响。负载均衡操作结束后,应用也无需重启,将自动连接新的vnode。 - -如果mnode一段时间没有收到dnode的状态报告,mnode会认为这个dnode已经离线。如果离线时间超过一定时长(时长由配置参数offlineThreshold决定),该dnode将被mnode强制剔除出集群。该dnode上的vnodes如果副本数大于一,系统将自动在其他dnode上创建新的副本,以保证数据的副本数。 - - - -**Note:**目前集群功能仅仅限于企业版 diff --git a/documentation20/webdocs/markdowndocs/More on System Architecture.md b/documentation20/webdocs/markdowndocs/More on System Architecture.md deleted file mode 100644 index d7a38b99a3ae5a630509f3ef0f0ffdc97d3aaaf1..0000000000000000000000000000000000000000 --- a/documentation20/webdocs/markdowndocs/More on System Architecture.md +++ /dev/null @@ -1,176 +0,0 @@ -# TDengine System Architecture - -## Storage Design - -TDengine data mainly include **metadata** and **data** that we will introduce in the following sections. - -### Metadata Storage - -Metadata include the information of databases, tables, etc. Metadata files are saved in _/var/lib/taos/mgmt/_ directory by default. The directory tree is as below: -``` -/var/lib/taos/ - +--mgmt/ - +--db.db - +--meters.db - +--user.db - +--vgroups.db -``` - -A metadata structure (database, table, etc.) is saved as a record in a metadata file. All metadata files are appended only, and even a drop operation adds a deletion record at the end of the file. - -### Data storage - -Data in TDengine are sharded according to the time range. Data of tables in the same vnode in a certain time range are saved in the same filegroup, such as files v0f1804*. This sharding strategy can effectively improve data searching speed. By default, a group of files contains data in 10 days, which can be configured by *daysPerFile* in the configuration file or by *DAYS* keyword in *CREATE DATABASE* clause. Data in files are blockwised. A data block only contains one table's data. Records in the same data block are sorted according to the primary timestamp, which helps to improve the compression rate and save storage. The compression algorithms used in TDengine include simple8B, delta-of-delta, RLE, LZ4, etc. - -By default, TDengine data are saved in */var/lib/taos/data/* directory. _/var/lib/taos/tsdb/_ directory contains vnode informations and data file linkes. - -``` -/var/lib/taos/ - +--tsdb/ - | +--vnode0 - | +--meterObj.v0 - | +--db/ - | +--v0f1804.head->/var/lib/taos/data/vnode0/v0f1804.head1 - | +--v0f1804.data->/var/lib/taos/data/vnode0/v0f1804.data - | +--v0f1804.last->/var/lib/taos/data/vnode0/v0f1804.last1 - | +--v0f1805.head->/var/lib/taos/data/vnode0/v0f1805.head1 - | +--v0f1805.data->/var/lib/taos/data/vnode0/v0f1805.data - | +--v0f1805.last->/var/lib/taos/data/vnode0/v0f1805.last1 - | : - +--data/ - +--vnode0/ - +--v0f1804.head1 - +--v0f1804.data - +--v0f1804.last1 - +--v0f1805.head1 - +--v0f1805.data - +--v0f1805.last1 - : -``` - -#### meterObj file -There are only one meterObj file in a vnode. Informations bout the vnode, such as created time, configuration information, vnode statistic informations are saved in this file. It has the structure like below: - -``` - -[file_header] -[table_record1_offset&length] -[table_record2_offset&length] -... -[table_recordN_offset&length] -[table_record1] -[table_record2] -... -[table_recordN] - -``` -The file header takes 512 bytes, which mainly contains informations about the vnode. Each table record is the representation of a table on disk. - -#### head file -The _head_ files contain the index of data blocks in the _data_ file. The inner organization is as below: -``` - -[file_header] -[table1_offset] -[table2_offset] -... -[tableN_offset] -[table1_index_block] -[table2_index_block] -... -[tableN_index_block] - -``` -The table offset array in the _head_ file saves the information about the offsets of each table index block. Indices on data blocks in the same table are saved continuously. This also makes it efficient to load data indices on the same table. The data index block has a structure like: - -``` -[index_block_info] -[block1_index] -[block2_index] -... -[blockN_index] -``` -The index block info part contains the information about the index block such as the number of index blocks, etc. Each block index corresponds to a real data block in the _data_ file or _last_ file. Information about the location of the real data block, the primary timestamp range of the data block, etc. are all saved in the block index part. The block indices are sorted in ascending order according to the primary timestamp. So we can apply algorithms such as the binary search on the data to efficiently search blocks according to time. - -#### data file -The _data_ files store the real data block. They are append-only. The organization is as: -``` - -[file_header] -[block1] -[block2] -... -[blockN] - -``` -A data block in _data_ files only belongs to a table in the vnode and the records in a data block are sorted in ascending order according to the primary timestamp key. Data blocks are column-oriented. Data in the same column are stored contiguously, which improves reading speed and compression rate because of their similarity. A data block has the following organization: - -``` -[column1_info] -[column2_info] -... -[columnN_info] -[column1_data] -[column2_data] -... -[columnN_data] -``` -The column info part includes information about column types, column compression algorithm, column data offset and length in the _data_ file, etc. Besides, pre-calculated results of the column data in the block are also in the column info part, which helps to improve reading speed by avoiding loading data block necessarily. - -#### last file -To avoid storage fragment and to import query speed and compression rate, TDengine introduces an extra file, the _last_ file. When the number of records in a data block is lower than a threshold, TDengine will flush the block to the _last_ file for temporary storage. When new data comes, the data in the _last_ file will be merged with the new data and form a larger data block and written to the _data_ file. The organization of the _last_ file is similar to the _data_ file. - -### Summary -The innovation in architecture and storage design of TDengine improves resource usage. On the one hand, the virtualization makes it easy to distribute resources between different vnodes and for future scaling. On the other hand, sorted and column-oriented storage makes TDengine have a great advantage in writing, querying and compression. - -## Query Design - -#### Introduction - -TDengine provides a variety of query functions for both tables and super tables. In addition to regular aggregate queries, it also provides time window based query and statistical aggregation for time series data. TDengine's query processing requires the client app, management node, and data node to work together. The functions and modules involved in query processing included in each component are as follows: - -Client (Client App). The client development kit, embed in a client application, consists of TAOS SQL parser and query executor, the second-stage aggregator (Result Merger), continuous query manager and other major functional modules. The SQL parser is responsible for parsing and verifying the SQL statement and converting it into an abstract syntax tree. The query executor is responsible for transforming the abstract syntax tree into the query execution logic and creates the metadata query according to the query condition of the SQL statement. Since TAOS SQL does not currently include complex nested queries and pipeline query processing mechanism, there is no longer need for query plan optimization and physical query plan conversions. The second-stage aggregator is responsible for performing the aggregation of the independent results returned by query involved data nodes at the client side to generate final results. The continuous query manager is dedicated to managing the continuous queries created by users, including issuing fixed-interval query requests and writing the results back to TDengine or returning to the client application as needed. Also, the client is also responsible for retrying after the query fails, canceling the query request, and maintaining the connection heartbeat and reporting the query status to the management node. - -Management Node. The management node keeps the metadata of all the data of the entire cluster system, provides the metadata of the data required for the query from the client node, and divides the query request according to the load condition of the cluster. The super table contains information about all the tables created according to the super table, so the query processor (Query Executor) of the management node is responsible for the query processing of the tags of tables and returns the table information satisfying the tag query. Besides, the management node maintains the query status of the cluster in the Query Status Manager component, in which the metadata of all queries that are currently executing are temporarily stored in-memory buffer. When the client issues *show queries* command to management node, current running queries information is returned to the client. - -Data Node. The data node, responsible for storing all data of the database, consists of query executor, query processing scheduler, query task queue, and other related components. Once the query requests from the client received, they are put into query task queue and waiting to be processed by query executor. The query executor extracts the query request from the query task queue and invokes the query optimizer to perform the basic optimization for the query execution plan. And then query executor scans the qualified data blocks in both cache and disk to obtain qualified data and return the calculated results. Besides, the data node also needs to respond to management information and commands from the management node. For example, after the *kill query* received from the management node, the query task needs to be stopped immediately. - -
-
Fig 1. System query processing architecture diagram (only query related components)
- -#### Query Process Design - -The client, the management node, and the data node cooperate to complete the entire query processing of TDengine. Let's take a concrete SQL query as an example to illustrate the whole query processing flow. The SQL statement is to query on super table *FOO_SUPER_TABLE* to get the total number of records generated on January 12, 2019, from the table, of which TAG_LOC equals to 'beijing'. The SQL statement is as follows: - -```sql -SELECT COUNT(*) -FROM FOO_SUPER_TABLE -WHERE TAG_LOC = 'beijing' AND TS >= '2019-01-12 00:00:00' AND TS < '2019-01-13 00:00:00' -``` - -First, the client invokes the TAOS SQL parser to parse and validate the SQL statement, then generates a syntax tree, and extracts the object of the query - the super table *FOO_SUPER_TABLE*, and then the parser sends requests with filtering information (TAG_LOC='beijing') to management node to get the corresponding metadata about *FOO_SUPER_TABLE*. - -Once the management node receives the request for metadata acquisition, first finds the super table *FOO_SUPER_TABLE* basic information, and then applies the query condition (TAG_LOC='beijing') to filter all the related tables created according to it. And finally, the query executor returns the metadata information that satisfies the query request to the client. - -After the client obtains the metadata information of *FOO_SUPER_TABLE*, the query executor initiates a query request with timestamp range filtering condition (TS >= '2019- 01-12 00:00:00' AND TS < '2019-01-13 00:00:00') to all nodes that hold the corresponding data according to the information about data distribution in metadata. - -The data node receives the query sent from the client, converts it into an internal structure and puts it into the query task queue to be executed by query executor after optimizing the execution plan. When the query result is obtained, the query result is returned to the client. It should be noted that the data nodes perform the query process independently of each other, and rely solely on their data and content for processing. - -When all data nodes involved in the query return results, the client aggregates the result sets from each data node. In this case, all results are accumulated to generate the final query result. The second stage of aggregation is not always required for all queries. For example, a column selection query does not require a second-stage aggregation at all. - -#### REST Query Process - -In addition to C/C++, Python, and JDBC interface, TDengine also provides a REST interface based on the HTTP protocol, which is different from using the client application programming interface. When the user uses the REST interface, all the query processing is completed on the server-side, and the user's application is not involved in query processing anymore. After the query processing is completed, the result is returned to the client through the HTTP JSON string. - -
-
Fig. 2 REST query architecture
- -When a client uses an HTTP-based REST query interface, the client first establishes a connection with the HTTP connector at the data node and then uses the token to ensure the reliability of the request through the REST signature mechanism. For the data node, after receiving the request, the HTTP connector invokes the embedded client program to initiate a query processing, and then the embedded client parses the SQL statement from the HTTP connector and requests the management node to get metadata as needed. After that, the embedded client sends query requests to the same data node or other nodes in the cluster and aggregates the calculation results on demand. Finally, you also need to convert the result of the query into a JSON format string and return it to the client via an HTTP response. After the HTTP connector receives the request SQL, the subsequent process processing is completely consistent with the query processing using the client application development kit. - -It should be noted that during the entire processing, the client application is no longer involved in, and is only responsible for sending SQL requests through the HTTP protocol and receiving the results in JSON format. Besides, each data node is embedded with an HTTP connector and a client, so any data node in the cluster received requests from a client, the data node can initiate the query and return the result to the client through the HTTP protocol, with transfer the request to other data nodes. - -#### Technology - -Because TDengine stores data and tags value separately, the tag value is kept in the management node and directly associated with each table instead of records, resulting in a great reduction of the data storage. Therefore, the tag value can be managed by a fully in-memory structure. First, the filtering of the tag data can drastically reduce the data size involved in the second phase of the query. The query processing for the data is performed at the data node. TDengine takes advantage of the immutable characteristics of IoT data by calculating the maximum, minimum, and other statistics of the data in one data block on each saved data block, to effectively improve the performance of query processing. If the query process involves all the data of the entire data block, the pre-computed result is used directly, and the content of the data block is no longer needed. Since the size of disk space required to store the pre-computation result is much smaller than the size of the specific data, the pre-computation result can greatly reduce the disk IO and speed up the query processing. - -TDengine employs column-oriented data storage techniques. When the data block is involved to be loaded from the disk for calculation, only the required column is read according to the query condition, and the read overhead can be minimized. The data of one column is stored in a contiguous memory block and therefore can make full use of the CPU L2 cache to greatly speed up the data scanning. Besides, TDengine utilizes the eagerly responding mechanism and returns a partial result before the complete result is acquired. For example, when the first batch of results is obtained, the data node immediately returns it directly to the client in case of a column select query. \ No newline at end of file diff --git a/documentation20/webdocs/markdowndocs/Super Table-ch.md b/documentation20/webdocs/markdowndocs/Super Table-ch.md deleted file mode 100644 index e5c77471570a76e608d59a0dca10462315460337..0000000000000000000000000000000000000000 --- a/documentation20/webdocs/markdowndocs/Super Table-ch.md +++ /dev/null @@ -1,224 +0,0 @@ -# 超级表STable:多表聚合 - -TDengine要求每个数据采集点单独建表,这样能极大提高数据的插入/查询性能,但是导致系统中表的数量猛增,让应用对表的维护以及聚合、统计操作难度加大。为降低应用的开发难度,TDengine引入了超级表STable (Super Table)的概念。 - -## 什么是超级表 - -STable是同一类型数据采集点的抽象,是同类型采集实例的集合,包含多张数据结构一样的子表。每个STable为其子表定义了表结构和一组标签:表结构即表中记录的数据列及其数据类型;标签名和数据类型由STable定义,标签值记录着每个子表的静态信息,用以对子表进行分组过滤。子表本质上就是普通的表,由一个时间戳主键和若干个数据列组成,每行记录着具体的数据,数据查询操作与普通表完全相同;但子表与普通表的区别在于每个子表从属于一张超级表,并带有一组由STable定义的标签值。每种类型的采集设备可以定义一个STable。数据模型定义表的每列数据的类型,如温度、压力、电压、电流、GPS实时位置等,而标签信息属于Meta Data,如采集设备的序列号、型号、位置等,是静态的,是表的元数据。用户在创建表(数据采集点)时指定STable(采集类型)外,还可以指定标签的值,也可事后增加或修改。 - -TDengine扩展标准SQL语法用于定义STable,使用关键词tags指定标签信息。语法如下: - -```mysql -CREATE TABLE ( TIMESTAMP, field_name1 field_type,…) TAGS(tag_name tag_type, …) -``` - -其中tag_name是标签名,tag_type是标签的数据类型。标签可以使用时间戳之外的其他TDengine支持的数据类型,标签的个数最多为6个,名字不能与系统关键词相同,也不能与其他列名相同。如: - -```mysql -create table thermometer (ts timestamp, degree float) -tags (location binary(20), type int) -``` - -上述SQL创建了一个名为thermometer的STable,带有标签location和标签type。 - -为某个采集点创建表时,可以指定其所属的STable以及标签的值,语法如下: - -```mysql -CREATE TABLE USING TAGS (tag_value1,...) -``` - -沿用上面温度计的例子,使用超级表thermometer建立单个温度计数据表的语句如下: - -```mysql -create table t1 using thermometer tags (‘beijing’, 10) -``` - -上述SQL以thermometer为模板,创建了名为t1的表,这张表的Schema就是thermometer的Schema,但标签location值为‘beijing’,标签type值为10。 - -用户可以使用一个STable创建数量无上限的具有不同标签的表,从这个意义上理解,STable就是若干具有相同数据模型,不同标签的表的集合。与普通表一样,用户可以创建、删除、查看超级表STable,大部分适用于普通表的查询操作都可运用到STable上,包括各种聚合和投影选择函数。除此之外,可以设置标签的过滤条件,仅对STbale中部分表进行聚合查询,大大简化应用的开发。 - -TDengine对表的主键(时间戳)建立索引,暂时不提供针对数据模型中其他采集量(比如温度、压力值)的索引。每个数据采集点会采集若干数据记录,但每个采集点的标签仅仅是一条记录,因此数据标签在存储上没有冗余,且整体数据规模有限。TDengine将标签数据与采集的动态数据完全分离存储,而且针对STable的标签建立了高性能内存索引结构,为标签提供全方位的快速操作支持。用户可按照需求对其进行增删改查(Create,Retrieve,Update,Delete,CRUD)操作。 - -STable从属于库,一个STable只属于一个库,但一个库可以有一到多个STable, 一个STable可有多个子表。 - -## 超级表管理 - -- 创建超级表 - - ```mysql - CREATE TABLE ( TIMESTAMP, field_name1 field_type,…) TAGS(tag_name tag_type, …) - ``` - - 与创建表的SQL语法相似。但需指定TAGS字段的名称和类型。 - - 说明: - - 1. TAGS列总长度不能超过512 bytes; - 2. TAGS列的数据类型不能是timestamp和nchar类型; - 3. TAGS列名不能与其他列名相同; - 4. TAGS列名不能为预留关键字. - -- 显示已创建的超级表 - - ```mysql - show stables; - ``` - - 查看数据库内全部STable,及其相关信息,包括STable的名称、创建时间、列数量、标签(TAG)数量、通过该STable建表的数量。 - -- 删除超级表 - - ```mysql - DROP TABLE - ``` - - Note: 删除STable不会级联删除通过STable创建的表;相反删除STable时要求通过该STable创建的表都已经被删除。 - -- 查看属于某STable并满足查询条件的表 - - ```mysql - SELECT TBNAME,[TAG_NAME,…] FROM WHERE <[=|=<|>=|<>] values..> ([AND|OR] …) - ``` - - 查看属于某STable并满足查询条件的表。说明:TBNAME为关键词,显示通过STable建立的子表表名,查询过程中可以使用针对标签的条件。 - - ```mysql - SELECT COUNT(TBNAME) FROM WHERE <[=|=<|>=|<>] values..> ([AND|OR] …) - ``` - - 统计属于某个STable并满足查询条件的子表的数量 - -## 写数据时自动建子表 - -在某些特殊场景中,用户在写数据时并不确定某个设备的表是否存在,此时可使用自动建表语法来实现写入数据时里用超级表定义的表结构自动创建不存在的子表,若该表已存在则不会建立新表。注意:自动建表语句只能自动建立子表而不能建立超级表,这就要求超级表已经被事先定义好。自动建表语法跟insert/import语法非常相似,唯一区别是语句中增加了超级表和标签信息。具体语法如下: - -```mysql -INSERT INTO USING TAGS (, ...) VALUES (field_value, ...) (field_value, ...) ...; -``` - -向表tb_name中插入一条或多条记录,如果tb_name这张表不存在,则会用超级表stb_name定义的表结构以及用户指定的标签值(即tag1_value…)来创建名为tb_name新表,并将用户指定的值写入表中。如果tb_name已经存在,则建表过程会被忽略,系统也不会检查tb_name的标签是否与用户指定的标签值一致,也即不会更新已存在表的标签。 - -```mysql -INSERT INTO USING TAGS (, ...) VALUES (, ...) (, ...) ... USING TAGS(, ...) VALUES (, ...) ...; -``` - -向多张表tb1_name,tb2_name等插入一条或多条记录,并分别指定各自的超级表进行自动建表。 - -## STable中TAG管理 - -除了更新标签的值的操作是针对子表进行,其他所有的标签操作(添加标签、删除标签等)均只能作用于STable,不能对单个子表操作。对STable添加标签以后,依托于该STable建立的所有表将自动增加了一个标签,对于数值型的标签,新增加的标签的默认值是0. - -- 添加新的标签 - - ```mysql - ALTER TABLE ADD TAG - ``` - - 为STable增加一个新的标签,并指定新标签的类型。标签总数不能超过6个。 - -- 删除标签 - - ```mysql - ALTER TABLE DROP TAG - ``` - - 删除超级表的一个标签,从超级表删除某个标签后,该超级表下的所有子表也会自动删除该标签。 - - 说明:第一列标签不能删除,至少需要为STable保留一个标签。 - -- 修改标签名 - - ```mysql - ALTER TABLE CHANGE TAG - ``` - - 修改超级表的标签名,从超级表修改某个标签名后,该超级表下的所有子表也会自动更新该标签名。 - -- 修改子表的标签值 - - ```mysql - ALTER TABLE SET TAG = - ``` - -## STable多表聚合 - -针对所有的通过STable创建的子表进行多表聚合查询,支持按照全部的TAG值进行条件过滤,并可将结果按照TAGS中的值进行聚合,暂不支持针对binary类型的模糊匹配过滤。语法如下: - -```mysql -SELECT function,… - FROM - WHERE <[=|<=|>=|<>] values..> ([AND|OR] …) - INTERVAL ( [, offset]) - GROUP BY , … - ORDER BY - SLIMIT - SOFFSET - LIMIT - OFFSET -``` - -**说明**: - -超级表聚合查询,TDengine目前支持以下聚合\选择函数:sum、count、avg、first、last、min、max、top、bottom,以及针对全部或部分列的投影操作,使用方式与单表查询的计算过程相同。暂不支持其他类型的聚合计算和四则运算。当前所有的函数及计算过程均不支持嵌套的方式进行执行。 - - 不使用GROUP BY的查询将会对超级表下所有满足筛选条件的表按时间进行聚合,结果输出默认是按照时间戳单调递增输出,用户可以使用ORDER BY _c0 ASC|DESC选择查询结果时间戳的升降排序;使用GROUP BY 的聚合查询会按照tags进行分组,并对每个组内的数据分别进行聚合,输出结果为各个组的聚合结果,组间的排序可以由ORDER BY 语句指定,每个分组内部,时间序列是单调递增的。 - -使用SLIMIT/SOFFSET语句指定组间分页,即指定结果集中输出的最大组数以及对组起始的位置。使用LIMIT/OFFSET语句指定组内分页,即指定结果集中每个组内最多输出多少条记录以及记录起始的位置。 - -## STable使用示例 - -以温度传感器采集时序数据作为例,示范STable的使用。 在这个例子中,对每个温度计都会建立一张表,表名为温度计的ID,温度计读数的时刻记为ts,采集的值记为degree。通过tags给每个采集器打上不同的标签,其中记录温度计的地区和类型,以方便我们后面的查询。所有温度计的采集量都一样,因此我们用STable来定义表结构。 - -###定义STable表结构并使用它创建子表 - -创建STable语句如下: - -```mysql -CREATE TABLE thermometer (ts timestamp, degree double) -TAGS(location binary(20), type int) -``` - -假设有北京,天津和上海三个地区的采集器共4个,温度采集器有3种类型,我们就可以对每个采集器建表如下: - -```mysql -CREATE TABLE therm1 USING thermometer TAGS (’beijing’, 1); -CREATE TABLE therm2 USING thermometer TAGS (’beijing’, 2); -CREATE TABLE therm3 USING thermometer TAGS (’tianjin’, 1); -CREATE TABLE therm4 USING thermometer TAGS (’shanghai’, 3); -``` - -其中therm1,therm2,therm3,therm4是超级表thermometer四个具体的子表,也即普通的Table。以therm1为例,它表示采集器therm1的数据,表结构完全由thermometer定义,标签location=”beijing”, type=1表示therm1的地区是北京,类型是第1类的温度计。 - -###写入数据 - -注意,写入数据时不能直接对STable操作,而是要对每张子表进行操作。我们分别向四张表therm1,therm2, therm3, therm4写入一条数据,写入语句如下: - -```mysql -INSERT INTO therm1 VALUES (’2018-01-01 00:00:00.000’, 20); -INSERT INTO therm2 VALUES (’2018-01-01 00:00:00.000’, 21); -INSERT INTO therm3 VALUES (’2018-01-01 00:00:00.000’, 24); -INSERT INTO therm4 VALUES (’2018-01-01 00:00:00.000’, 23); -``` - -###按标签聚合查询 - -查询位于北京(beijing)和天津(tianjing)两个地区的温度传感器采样值的数量count(*)、平均温度avg(degree)、最高温度max(degree)、最低温度min(degree),并将结果按所处地域(location)和传感器类型(type)进行聚合。 - -```mysql -SELECT COUNT(*), AVG(degree), MAX(degree), MIN(degree) -FROM thermometer -WHERE location=’beijing’ or location=’tianjing’ -GROUP BY location, type -``` - -###按时间周期聚合查询 - -查询仅位于北京以外地区的温度传感器最近24小时(24h)采样值的数量count(*)、平均温度avg(degree)、最高温度max(degree)和最低温度min(degree),将采集结果按照10分钟为周期进行聚合,并将结果按所处地域(location)和传感器类型(type)再次进行聚合。 - -```mysql -SELECT COUNT(*), AVG(degree), MAX(degree), MIN(degree) -FROM thermometer -WHERE name<>’beijing’ and ts>=now-1d -INTERVAL(10M) -GROUP BY location, type -``` \ No newline at end of file diff --git a/documentation20/webdocs/markdowndocs/Super Table.md b/documentation20/webdocs/markdowndocs/Super Table.md deleted file mode 100644 index a213567f6d67ed351fac67b821f4db1929fa3a22..0000000000000000000000000000000000000000 --- a/documentation20/webdocs/markdowndocs/Super Table.md +++ /dev/null @@ -1,195 +0,0 @@ -# STable: Super Table - -"One Table for One Device" design can improve the insert/query performance significantly for a single device. But it has a side effect, the aggregation of multiple tables becomes hard. To reduce the complexity and improve the efficiency, TDengine introduced a new concept: STable (Super Table). - -## What is a Super Table - -STable is an abstract and a template for a type of device. A STable contains a set of devices (tables) that have the same schema or data structure. Besides the shared schema, a STable has a set of tags, like the model, serial number and so on. Tags are used to record the static attributes for the devices and are used to group a set of devices (tables) for aggregation. Tags are metadata of a table and can be added, deleted or changed. - -TDengine does not save tags as a part of the data points collected. Instead, tags are saved as metadata. Each table has a set of tags. To improve query performance, tags are all cached and indexed. One table can only belong to one STable, but one STable may contain many tables. - -Like a table, you can create, show, delete and describe STables. Most query operations on tables can be applied to STable too, including the aggregation and selector functions. For queries on a STable, if no tags filter, the operations are applied to all the tables created via this STable. If there is a tag filter, the operations are applied only to a subset of the tables which satisfy the tag filter conditions. It will be very convenient to use tags to put devices into different groups for aggregation. - -##Create a STable - -Similiar to creating a standard table, syntax is: - -```mysql -CREATE TABLE ( TIMESTAMP, field_name1 field_type,…) TAGS(tag_name tag_type, …) -``` - -New keyword "tags" is introduced, where tag_name is the tag name, and tag_type is the associated data type. - -Note: - -1. The bytes of all tags together shall be less than 512 -2. Tag's data type can not be time stamp or nchar -3. Tag name shall be different from the field name -4. Tag name shall not be the same as system keywords -5. Maximum number of tags is 6 - -For example: - -```mysql -create table thermometer (ts timestamp, degree float) -tags (location binary(20), type int) -``` - -The above statement creates a STable thermometer with two tag "location" and "type" - -##Create a Table via STable - -To create a table for a device, you can use a STable as its template and assign the tag values. The syntax is: - -```mysql -CREATE TABLE USING TAGS (tag_value1,...) -``` - -You can create any number of tables via a STable, and each table may have different tag values. For example, you create five tables via STable thermometer below: - -```mysql - create table t1 using thermometer tags (‘beijing’, 10); - create table t2 using thermometer tags (‘beijing’, 20); - create table t3 using thermometer tags (‘shanghai’, 10); - create table t4 using thermometer tags (‘shanghai’, 20); - create table t5 using thermometer tags (‘new york’, 10); -``` - -## Aggregate Tables via STable - -You can group a set of tables together by specifying the tags filter condition, then apply the aggregation operations. The result set can be grouped and ordered based on tag value. Syntax is: - -```mysql -SELECT function,… - FROM - WHERE <[=|<=|>=|<>] values..> ([AND|OR] …) - INTERVAL (