Merge branch 'develop' into hotfix/TD-1429

0ba880d1 · Hongze Cheng · 608cd044 · b604ed13 · 0ba880d1 · 0ba880d1
64 changed file
--- a/.travis.yml
+++ b/.travis.yml
@@ -54,6 +54,7 @@ matrix:

          py3ver=`python3 --version|awk '{print $2}'|cut -d "." -f 1,2` && apt install python$py3ver-dev
          pip3 install psutil
+          pip3 install guppy3
          pip3 install --user ${TRAVIS_BUILD_DIR}/src/connector/python/linux/python3/

          cd ${TRAVIS_BUILD_DIR}/tests

--- a/cmake/version.inc
+++ b/cmake/version.inc
@@ -4,7 +4,7 @@ PROJECT(TDengine)
 IF (DEFINED VERNUMBER)
  SET(TD_VER_NUMBER ${VERNUMBER})
 ELSE ()
-  SET(TD_VER_NUMBER "2.0.4.0")
+  SET(TD_VER_NUMBER "2.0.5.1")
 ENDIF ()

 IF (DEFINED VERCOMPATIBLE)

--- a/documentation20/webdocs/markdowndocs/Evaluation-ch.md
+++ b/documentation20/webdocs/markdowndocs/Evaluation-ch.md
@@ -11,7 +11,7 @@ TDengine的模块之一是时序数据库。但除此之外，为减少研发的
 * __全栈时序数据处理引擎__：将数据库、消息队列、缓存、流式计算等功能融为一体，应用无需再集成Kafka/Redis/HBase/Spark/HDFS等软件，大幅降低应用开发和维护的复杂度成本。 
 * __强大的分析功能__：无论是十年前还是一秒钟前的数据，指定时间范围即可查询。数据可在时间轴上或多个设备上进行聚合。即席查询可通过Shell, Python, R, Matlab随时进行。
 * __与第三方工具无缝连接__：不用一行代码，即可与Telegraf, Grafana, EMQ, Prometheus, Matlab, R等集成。后续将支持OPC, Hadoop, Spark等, BI工具也将无缝连接。
-* __零运维成本、零学习成本__：安装、集群一秒搞定，无需分库分表，实时备份。标准SQL，支持JDBC, RESTful, 支持Python/Java/C/C++/Go, 与MySQL相似，零学习成本。
+* __零运维成本、零学习成本__：安装集群简单快捷，无需分库分表，实时备份。类似标准SQL，支持RESTful, 支持Python/Java/C/C++/C#/Go/Node.js, 与MySQL相似，零学习成本。

 采用TDengine，可将典型的物联网、车联网、工业互联网大数据平台的总拥有成本大幅降低。但需要指出的是，因充分利用了物联网时序数据的特点，它无法用来处理网络爬虫、微博、微信、电商、ERP、CRM等通用型数据。


--- a/documentation20/webdocs/markdowndocs/Getting Started-ch.md
+++ b/documentation20/webdocs/markdowndocs/Getting Started-ch.md
@@ -30,13 +30,13 @@ TDengine软件分为服务器、客户端和报警模块三部分，目前2.0版

 - TDengine-alert-2.0.0-Linux-x64.tar.gz (8.1M)

-目前，TDengine只支持在使用[`systemd`](https://en.wikipedia.org/wiki/Systemd)做进程服务管理的linux系统上安装。其他linux系统的支持正在开发中。用`which`命令来检测系统中是否存在`systemd`:
+目前，TDengine只支持在使用[`systemd`](https://en.wikipedia.org/wiki/Systemd)做进程服务管理的linux系统上安装。其他linux系统的支持正在开发中。用`which systemctl`命令来检测系统中是否存在`systemd`包:

 ```cmd
-which systemd
+which systemctl
 ```

-如果系统中不存在`systemd`命令，请考虑[通过源码安装](#通过源码安装)TDengine。
+如果系统中不存在`systemd`包，请考虑[通过源码安装](#通过源码安装)TDengine。

 具体的安装过程，请参见<a href="https://www.taosdata.com/blog/2019/08/09/566.html">TDengine多种安装包的安装和卸载</a>。


--- a/documentation20/webdocs/markdowndocs/Getting Started.md
+++ b/documentation20/webdocs/markdowndocs/Getting Started.md
@@ -16,13 +16,13 @@ Three different packages are provided, please pick up the one you like.
 <li><a id='tdengine-deb' style='color:var(--b2)'>TDengine DEB package (1.7M)</a></li>
 <li><a id='tdengine-tar' style='color:var(--b2)'>TDengine Tarball (3.0M)</a></li>
 </ul>
-For the time being, TDengine only supports installation on Linux systems using [`systemd`](https://en.wikipedia.org/wiki/Systemd) as the service manager. To check if your system has *systemd*, use the _which_ command.
+For the time being, TDengine only supports installation on Linux systems using [`systemd`](https://en.wikipedia.org/wiki/Systemd) as the service manager. To check if your system has *systemd* package, use the _which systemctl_ command.

 ```cmd
-which systemd
+which systemctl
 ```

-If the `systemd` command is not found, please [install from source code](#Install-from-Source). 
+If the `systemd` package is not found, please [install from source code](#Install-from-Source). 

 ### Running TDengine


--- a/documentation20/webdocs/markdowndocs/Queries-ch.md
+++ b/documentation20/webdocs/markdowndocs/Queries-ch.md
@@ -29,23 +29,9 @@ Query OK, 2 row(s) in set (0.001100s)
 具体的查询语法请看<a href="https://www.taosdata.com/cn/documentation20/taos-sql/">TAOS SQL </a>。

 ## 多表聚合查询
+物联网场景中，往往同一个类型的数据采集点有多个。TDengine采用超级表(STable)的概念来描述某一个类型的数据采集点，一张普通的表来描述一个具体的数据采集点。同时TDengine使用标签来描述数据采集点的静态属性，一个具体的数据采集点有具体的标签值。通过指定标签的过滤条件，TDengine提供了一高效的方法将超级表(某一类型的数据采集点)所属的子表进行聚合查询。对普通表的聚合函数以及绝大部分操作都适用于超级表，语法完全一样。  

-TDengine对每个数据采集点单独建表，但在实际应用中经常需要对不同的采集点数据进行聚合。为高效的进行聚合操作，TDengine引入超级表（STable）的概念。超级表用来代表一特定类型的数据采集点，它是包含多张表的表集合，集合里每张表的模式（schema）完全一致，但每张表都带有自己的静态标签，标签可以多个，可以随时增加、删除和修改。
-
-应用可通过指定标签的过滤条件，对一个STable下的全部或部分表进行聚合或统计操作，这样大大简化应用的开发。其具体流程如下图所示：
-
-<center> <img src="../assets/stable.png"> </center>
-
-<center> 多表聚合查询原理图  </center>
-
-1：应用将一个查询条件发往系统；2: taosc将超级表的名字发往 Meta Node（管理节点)；3：管理节点将超级表所拥有的 vnode 列表发回 taosc；4：taosc将计算的请求连同标签过滤条件发往这些vnode对应的多个数据节点；5：每个vnode先在内存里查找出自己节点里符合标签过滤条件的表的集合，然后扫描存储的时序数据，完成相应的聚合计算，将结果返回给taosc；6：taosc将多个数据节点返回的结果做最后的聚合，将其返回给应用。
-
-由于TDengine在vnode内将标签数据与时序数据分离存储，通过先在内存里过滤标签数据，将需要扫描的数据集大幅减少，大幅提升聚合计算速度。同时，由于数据分布在多个vnode/dnode，聚合计算操作在多个vnode里并发进行，又进一步提升了聚合的速度。
-
-对普通表的聚合函数以及绝大部分操作都适用于超级表，语法完全一样，细节请看 TAOS SQL。
-
-比如：在TAOS Shell，查找所有智能电表采集的电压平均值，并按照location分组
-
+**示例1**：在TAOS Shell，查找北京所有智能电表采集的电压平均值，并按照location分组
 ```mysql
 taos> SELECT AVG(voltage) FROM meters GROUP BY location;
       avg(voltage)        |            location            |
@@ -55,6 +41,18 @@ taos> SELECT AVG(voltage) FROM meters GROUP BY location;
 Query OK, 2 row(s) in set (0.002136s)
 ```

+**示例2**：在TAOS shell, 查找groupId为2的所有智能电表过去24小时的记录条数，电流的最大值
+
+```mysql
+taos> SELECT count(*), max(current) FROM meters where groupId = 2 and ts > now - 24h;
+     cunt(*)  |    max(current)  |
+==================================
+            5 |             13.4 |
+Query OK, 1 row(s) in set (0.002136s)
+```
+
+TDengine仅容许对属于同一个超级表的表之间进行聚合查询，不同超级表之间的聚合查询不支持。在<a href="https://www.taosdata.com/cn/documentation20/taos-sql/">TAOS SQL </a>一章，查询类操作都会注明是否支持超级表。
+
 ## 降采样查询、插值

 物联网场景里，经常需要通过降采样（down sampling）将采集的数据按时间段进行聚合。TDengine 提供了一个简便的关键词 interval 让按照时间窗口的查询操作变得极为简单。比如，将智能电表 d1001 采集的电流值每10秒钟求和
@@ -66,9 +64,9 @@ taos> SELECT sum(current) FROM d1001 INTERVAL(10s);
 2018-10-03 14:38:10.000 |              24.900000572 |
 Query OK, 2 row(s) in set (0.000883s)
 ```
-降采样操作也适用于超级表，比如：将所有智能电表采集的电流值每秒钟求和
+降采样操作也适用于超级表，比如：将北京所有智能电表采集的电流值每秒钟求和
 ```mysql
-taos> SELECT SUM(current) FROM meters INTERVAL(1s);
+taos> SELECT SUM(current) FROM meters where location like "Beijing%" INTERVAL(1s);
           ts            |       sum(current)        |
 ======================================================
 2018-10-03 14:38:04.000 |              10.199999809 |

--- a/documentation20/webdocs/markdowndocs/TAOS SQL-ch.md
+++ b/documentation20/webdocs/markdowndocs/TAOS SQL-ch.md
@@ -125,6 +125,7 @@ TDengine缺省的时间戳是毫秒精度，但通过修改配置参数enableMic
    1) 表的第一个字段必须是TIMESTAMP，并且系统自动将其设为主键；
    2) 表名最大长度为193；
    3) 表的每行长度不能超过16k个字符;
+    4) 子表名只能由字母、数字和下划线组成，且不能以数字开头
    5) 使用数据类型binary或nchar，需指定其最长的字节数，如binary(20)，表示20字节；

 - **删除数据表**

--- a/documentation20/webdocs/markdowndocs/administrator-ch.md
+++ b/documentation20/webdocs/markdowndocs/administrator-ch.md
@@ -82,8 +82,7 @@ TDengine系统后台服务由taosd提供，可以在配置文件taos.cfg里修

 下面仅仅列出一些重要的配置参数，更多的参数请看配置文件里的说明。各个参数的详细介绍及作用请看前述章节，而且这些参数的缺省配置都是工作的，一般无需设置。**注意：配置修改后，需要重启*taosd*服务才能生效。**

- firstEp: taosd启动时，主动连接的集群中第一个dnode的end point, 默认值为localhost:6030。
- secondEp: taosd启动时，如果first连接不上，尝试连接集群中第二个dnode的end point, 默认值为空。
+- firstEp: taosd启动时，主动连接的集群中首个dnode的end point, 默认值为localhost:6030。
 - fqdn：数据节点的FQDN，缺省为操作系统配置的第一个hostname。如果习惯IP地址访问，可设置为该节点的IP地址。
 - serverPort：taosd启动后，对外服务的端口号，默认值为6030。
 - httpPort: RESTful服务使用的端口号，所有的HTTP请求（TCP）都需要向该接口发起查询/写入请求, 默认值为6041。
@@ -156,76 +155,80 @@ TDengine系统的前台交互客户端应用程序为taos，它与taosd共享同
 客户端配置参数

 - firstEp: taos启动时，主动连接的集群中第一个taosd实例的end point, 缺省值为 localhost:6030。
- secondEp: taos启动时，如果first连接不上，尝试连接集群中第二个taosd实例的end point, 缺省值为空。
 - locale

-    > 默认值：系统中动态获取，如果自动获取失败，需要用户在配置文件设置或通过API设置
+    默认值：系统中动态获取，如果自动获取失败，需要用户在配置文件设置或通过API设置
    
-TDengine为存储中文、日文、韩文等非ASCII编码的宽字符，提供一种专门的字段类型nchar。写入nchar字段的数据将统一采用UCS4-LE格式进行编码并发送到服务器。需要注意的是，编码正确性是客户端来保证。因此，如果用户想要正常使用nchar字段来存储诸如中文、日文、韩文等非ASCII字符，需要正确设置客户端的编码格式。
+    TDengine为存储中文、日文、韩文等非ASCII编码的宽字符，提供一种专门的字段类型nchar。写入nchar字段的数据将统一采用UCS4-LE格式进行编码并发送到服务器。需要注意的是，编码正确性是客户端来保证。因此，如果用户想要正常使用nchar字段来存储诸如中文、日文、韩文等非ASCII字符，需要正确设置客户端的编码格式。

-客户端的输入的字符均采用操作系统当前默认的编码格式，在Linux系统上多为UTF-8，部分中文系统编码则可能是GB18030或GBK等。在docker环境中默认的编码是POSIX。在中文版Windows系统中，编码则是CP936。客户端需要确保正确设置自己所使用的字符集，即客户端运行的操作系统当前编码字符集，才能保证nchar中的数据正确转换为UCS4-LE编码格式。
+    客户端的输入的字符均采用操作系统当前默认的编码格式，在Linux系统上多为UTF-8，部分中文系统编码则可能是GB18030或GBK等。在docker环境中默认的编码是POSIX。在中文版Windows系统中，编码则是CP936。客户端需要确保正确设置自己所使用的字符集，即客户端运行的操作系统当前编码字符集，才能保证nchar中的数据正确转换为UCS4-LE编码格式。

-在 Linux 中 locale 的命名规则为: <语言>_<地区>.<字符集编码> 如：zh_CN.UTF-8，zh代表中文，CN代表大陆地区，UTF-8表示字符集。字符集编码为客户端正确解析本地字符串提供编码转换的说明。Linux系统与 Mac OSX 系统可以通过设置locale来确定系统的字符编码，由于Windows使用的locale中不是POSIX标准的locale格式，因此在Windows下需要采用另一个配置参数charset来指定字符编码。在Linux 系统中也可以使用charset来指定字符编码。
+    在 Linux 中 locale 的命名规则为: <语言>_<地区>.<字符集编码> 如：zh_CN.UTF-8，zh代表中文，CN代表大陆地区，UTF-8表示字符集。字符集编码为客户端正确解析本地字符串提供编码转换的说明。Linux系统与 Mac OSX 系统可以通过设置locale来确定系统的字符编码，由于Windows使用的locale中不是POSIX标准的locale格式，因此在Windows下需要采用另一个配置参数charset来指定字符编码。在Linux 系统中也可以使用charset来指定字符编码。

 - charset

-    > 默认值：系统中动态获取，如果自动获取失败，需要用户在配置文件设置或通过API设置
+    默认值：系统中动态获取，如果自动获取失败，需要用户在配置文件设置或通过API设置
    
-如果配置文件中不设置charset，在Linux系统中，taos在启动时候，自动读取系统当前的locale信息，并从locale信息中解析提取charset编码格式。如果自动读取locale信息失败，则尝试读取charset配置，如果读取charset配置也失败，则中断启动过程。
+    如果配置文件中不设置charset，在Linux系统中，taos在启动时候，自动读取系统当前的locale信息，并从locale信息中解析提取charset编码格式。如果自动读取locale信息失败，则尝试读取charset配置，如果读取charset配置也失败，则中断启动过程。

-在Linux系统中，locale信息包含了字符编码信息，因此正确设置了Linux系统locale以后可以不用再单独设置charset。例如：
-```
+    在Linux系统中，locale信息包含了字符编码信息，因此正确设置了Linux系统locale以后可以不用再单独设置charset。例如：
+    ```
    locale zh_CN.UTF-8
-```
-在Windows系统中，无法从locale获取系统当前编码。如果无法从配置文件中读取字符串编码信息，taos默认设置为字符编码为CP936。其等效在配置文件中添加如下配置：
-```
+    ```
+    在Windows系统中，无法从locale获取系统当前编码。如果无法从配置文件中读取字符串编码信息，taos默认设置为字符编码为CP936。其等效在配置文件中添加如下配置：
+    ```
    charset CP936
-```
-如果需要调整字符编码，请查阅当前操作系统使用的编码，并在配置文件中正确设置。
+    ```
+    如果需要调整字符编码，请查阅当前操作系统使用的编码，并在配置文件中正确设置。

-在Linux系统中，如果用户同时设置了locale和字符集编码charset，并且locale和charset的不一致，后设置的值将覆盖前面设置的值。
-```
+    在Linux系统中，如果用户同时设置了locale和字符集编码charset，并且locale和charset的不一致，后设置的值将覆盖前面设置的值。
+    ```
    locale zh_CN.UTF-8
    charset GBK
-```
-则charset的有效值是GBK。
-```
+    ```
+    则charset的有效值是GBK。
+    ```
    charset GBK
    locale zh_CN.UTF-8
-```
-charset的有效值是UTF-8。
+    ```
+    charset的有效值是UTF-8。

-日志的配置参数，与server 的配置参数完全一样。
+    日志的配置参数，与server 的配置参数完全一样。

 - timezone

    默认值：从系统中动态获取当前的时区设置

-客户端运行系统所在的时区。为应对多时区的数据写入和查询问题，TDengine 采用 Unix 时间戳(Unix Timestamp)来记录和存储时间戳。Unix 时间戳的特点决定了任一时刻不论在任何时区，产生的时间戳均一致。需要注意的是，Unix时间戳是在客户端完成转换和记录。为了确保客户端其他形式的时间转换为正确的 Unix 时间戳，需要设置正确的时区。
+    客户端运行系统所在的时区。为应对多时区的数据写入和查询问题，TDengine 采用 Unix 时间戳(Unix Timestamp)来记录和存储时间戳。Unix 时间戳的特点决定了任一时刻不论在任何时区，产生的时间戳均一致。需要注意的是，Unix时间戳是在客户端完成转换和记录。为了确保客户端其他形式的时间转换为正确的 Unix 时间戳，需要设置正确的时区。

-在Linux系统中，客户端会自动读取系统设置的时区信息。用户也可以采用多种方式在配置文件设置时区。例如：
-```
+    在Linux系统中，客户端会自动读取系统设置的时区信息。用户也可以采用多种方式在配置文件设置时区。例如：
+    ```
    timezone UTC-8
    timezone GMT-8
    timezone Asia/Shanghai
-```
-均是合法的设置东八区时区的格式。
+    ```
+    均是合法的设置东八区时区的格式。

-时区的设置对于查询和写入SQL语句中非Unix时间戳的内容（时间戳字符串、关键词now的解析）产生影响。例如：
-```
+    时区的设置对于查询和写入SQL语句中非Unix时间戳的内容（时间戳字符串、关键词now的解析）产生影响。例如：
+    ```
    SELECT count(*) FROM table_name WHERE TS<'2019-04-11 12:01:08';
-```
-在东八区，SQL语句等效于
-```
+    ```
+    在东八区，SQL语句等效于
+    ```
    SELECT count(*) FROM table_name WHERE TS<1554955268000;
-```
-在UTC时区，SQL语句等效于
-```
+    ```
+    在UTC时区，SQL语句等效于
+    ```
    SELECT count(*) FROM table_name WHERE TS<1554984068000;
-```
-为了避免使用字符串时间格式带来的不确定性，也可以直接使用Unix时间戳。此外，还可以在SQL语句中使用带有时区的时间戳字符串，例如：RFC3339格式的时间戳字符串，2013-04-12T15:52:01.123+08:00或者ISO-8601格式时间戳字符串2013-04-12T15:52:01.123+0800。上述两个字符串转化为Unix时间戳不受系统所在时区的影响。
+    ```
+    为了避免使用字符串时间格式带来的不确定性，也可以直接使用Unix时间戳。此外，还可以在SQL语句中使用带有时区的时间戳字符串，例如：RFC3339格式的时间戳字符串，2013-04-12T15:52:01.123+08:00或者ISO-8601格式时间戳字符串2013-04-12T15:52:01.123+0800。上述两个字符串转化为Unix时间戳不受系统所在时区的影响。
+
+    启动taos时，也可以从命令行指定一个taosd实例的end point，否则就从taos.cfg读取。
+   
+- maxBinaryDisplayWidth
+
+    Shell中binary 和 nchar字段的显示宽度上限，超过此限制的部分将被隐藏。默认值：30。可在 shell 中通过命令 set max_binary_display_width nn 动态修改此选项。
   
-启动taos时，也可以从命令行指定一个taosd实例的end point，否则就从taos.cfg读取。

 ## 用户管理

@@ -408,5 +411,4 @@ TDengine的所有可执行文件默认存放在 _/usr/local/taos/bin_ 目录下

 您可以通过修改系统配置文件taos.cfg来配置不同的数据目录和日志目录。

-##

--- a/documentation20/webdocs/markdowndocs/architecture-ch.md
+++ b/documentation20/webdocs/markdowndocs/architecture-ch.md
@@ -65,24 +65,24 @@ TDengine 的设计是基于单个硬件、软件系统不可靠，基于任何
 TDengine 分布式架构的逻辑结构图如下：
 <center> <img src="../assets/structure.png"> </center>
 <center> 图 1 TDengine架构示意图  </center>
-一个完整的 TDengine 系统是运行在一到多个物理节点上的，逻辑上，它包含数据节点(dnode)、TDengine客户端(taosc)以及应用(app)。系统中存在一到多个数据节点，这些数据节点组成一个集群(cluster)。应用通过taosc的API与TDengine集群进行互动。下面对每个逻辑单元进行简要介绍。
+一个完整的 TDengine 系统是运行在一到多个物理节点上的，逻辑上，它包含数据节点(dnode)、TDengine应用驱动(taosc)以及应用(app)。系统中存在一到多个数据节点，这些数据节点组成一个集群(cluster)。应用通过taosc的API与TDengine集群进行互动。下面对每个逻辑单元进行简要介绍。

-**物理节点(pnode):** pnode是一独立运行、拥有自己的计算、存储和网络能力的计算机，可以是安装有OS的物理机、虚拟机或容器。物理节点由其配置的 FQDN(Fully Qualified Domain Name)来标识。TDengine完全依赖FQDN来进行网络通讯，如果不了解FQDN，请看博文《[一篇文章说清楚TDengine的FQDN](https://www.taosdata.com/blog/2020/09/11/1824.html)》。
+**物理节点(pnode):** pnode是一独立运行、拥有自己的计算、存储和网络能力的计算机，可以是安装有OS的物理机、虚拟机或Docker容器。物理节点由其配置的 FQDN(Fully Qualified Domain Name)来标识。TDengine完全依赖FQDN来进行网络通讯，如果不了解FQDN，请看博文《[一篇文章说清楚TDengine的FQDN](https://www.taosdata.com/blog/2020/09/11/1824.html)》。

 **数据节点(dnode):** dnode 是 TDengine 服务器侧执行代码 taosd 在物理节点上的一个运行实例，一个工作的系统必须有至少一个数据节点。dnode包含零到多个逻辑的虚拟节点(VNODE)，零或者至多一个逻辑的管理节点(mnode)。dnode在系统中的唯一标识由实例的End Point (EP )决定。EP是dnode所在物理节点的FQDN (Fully Qualified Domain Name)和系统所配置的网络端口号(Port)的组合。通过配置不同的端口，一个物理节点(一台物理机、虚拟机或容器）可以运行多个实例，或有多个数据节点。

-**虚拟节点(vnode)**: 为更好的支持数据分片、负载均衡，防止数据过热或倾斜，数据节点被虚拟化成多个虚拟节点(vnode，图中V2, V3, V4等)。每个 vnode 都是一个相对独立的工作单元，是时序数据存储的基本单元，具有独立的运行线程、内存空间与持久化存储的路径。一个 vnode 包含一定数量的表（数据采集点）。当创建一张新表时，系统会检查是否需要创建新的 vnode。一个数据节点上能创建的 vnode 的数量取决于该数据节点所在物理节点的硬件资源。一个 vnode 只属于一个DB，但一个DB可以有多个 vnode。一个 vnode 除存储的时序数据外，也保存有所包含的表的SCHEMA、标签值等。一个虚拟节点由所属的数据节点的EP，以及所属的VGroup ID在系统内唯一标识，由管理节点创建并管理。
+**虚拟节点(vnode)**: 为更好的支持数据分片、负载均衡，防止数据过热或倾斜，数据节点被虚拟化成多个虚拟节点(vnode，图中V2, V3, V4等)。每个 vnode 都是一个相对独立的工作单元，是时序数据存储的基本单元，具有独立的运行线程、内存空间与持久化存储的路径。一个 vnode 包含一定数量的表（数据采集点）。当创建一张新表时，系统会检查是否需要创建新的 vnode。一个数据节点上能创建的 vnode 的数量取决于该数据节点所在物理节点的硬件资源。一个 vnode 只属于一个DB，但一个DB可以有多个 vnode。一个 vnode 除存储的时序数据外，也保存有所包含的表的schema、标签值等。一个虚拟节点由所属的数据节点的EP，以及所属的VGroup ID在系统内唯一标识，由管理节点创建并管理。

 **管理节点(mnode):** 一个虚拟的逻辑单元，负责所有数据节点运行状态的监控和维护，以及节点之间的负载均衡(图中M)。同时，管理节点也负责元数据(包括用户、数据库、表、静态标签等)的存储和管理，因此也称为 Meta Node。TDengine 集群中可配置多个(最多不超过5个) mnode，它们自动构建成为一个虚拟管理节点组(图中M0, M1, M2)。mnode 间采用 master/slave 的机制进行管理，而且采取强一致方式进行数据同步, 任何数据更新操作只能在 Master 上进行。mnode 集群的创建由系统自动完成，无需人工干预。每个dnode上至多有一个mnode，由所属的数据节点的EP来唯一标识。每个dnode通过内部消息交互自动获取整个集群中所有 mnode 所在的 dnode 的EP。

 **虚拟节点组(VGroup):** 不同数据节点上的 vnode 可以组成一个虚拟节点组(vnode group)来保证系统的高可靠。虚拟节点组内采取master/slave的方式进行管理。写操作只能在 master vnode 上进行，系统采用异步复制的方式将数据同步到 slave vnode，这样确保了一份数据在多个物理节点上有拷贝。一个 vgroup 里虚拟节点个数就是数据的副本数。如果一个DB的副本数为N，系统必须有至少N个数据节点。副本数在创建DB时通过参数 replica 可以指定，缺省为1。使用 TDengine 的多副本特性，可以不再需要昂贵的磁盘阵列等存储设备，就可以获得同样的数据高可靠性。虚拟节点组由管理节点创建、管理，并且由管理节点分配一个系统唯一的ID，VGroup ID。如果两个虚拟节点的vnode group ID相同，说明他们属于同一个组，数据互为备份。虚拟节点组里虚拟节点的个数是可以动态改变的，容许只有一个，也就是没有数据复制。VGroup ID是永远不变的，即使一个虚拟节点组被删除，它的ID也不会被收回重复利用。

-**TAOSC:** taosc是TDengine给应用提供的驱动程序(driver)，负责处理应用与集群的接口交互，内嵌于JDBC、ODBC driver中，或者C、Python、Go语言连接库里。应用都是通过taosc而不是直接连接集群中的数据节点与整个集群进行交互的。这个模块负责获取并缓存元数据；将插入、查询等请求转发到正确的数据节点；在把结果返回给应用时，还需要负责最后一级的聚合、排序、过滤等操作。对于JDBC, ODBC, C/C++接口而言，这个模块是在应用所处的物理节点上运行，但消耗的资源很小。同时，为支持全分布式的RESTful接口，taosc在TDengine集群的每个dnode上都有一运行实例。
+**TAOSC:** taosc是TDengine给应用提供的驱动程序(driver)，负责处理应用与集群的接口交互，提供C/C++语言原生接口，内嵌于JDBC、C#、Python、Go、Node.js语言连接库里。应用都是通过taosc而不是直接连接集群中的数据节点与整个集群进行交互的。这个模块负责获取并缓存元数据；将插入、查询等请求转发到正确的数据节点；在把结果返回给应用时，还需要负责最后一级的聚合、排序、过滤等操作。对于JDBC, C/C++/C#/Python/Go/Node.js接口而言，这个模块是在应用所处的物理节点上运行。同时，为支持全分布式的RESTful接口，taosc在TDengine集群的每个dnode上都有一运行实例。

 ### 节点之间的通讯
-**通讯方式：**TDengine系统的各个节点之间的通讯是通过TCP/UDP进行的。因为考虑到物联网场景，数据写入的包一般不大，因此TDengine 除采用TCP做传输之外，还采用UDP方式，因为UDP 更加高效，而且不受连接数的限制。TDengine实现了自己的超时、重传、确认等机制，以确保UDP的可靠传输。对于数据量不到15K的数据包，采取UDP的方式进行传输，超过15K的，或者是查询类的操作，自动采取TCP的方式进行传输。同时，TDengine根据配置和数据包，会自动对数据进行压缩/解压缩，数字签名/认证等处理。对于数据节点之间的数据复制，只采用TCP方式进行数据传输。
+**通讯方式：**TDengine系统的各个数据节点之间，以及应用驱动与各数据节点之间的通讯是通过TCP/UDP进行的。因为考虑到物联网场景，数据写入的包一般不大，因此TDengine 除采用TCP做传输之外，还采用UDP方式，因为UDP 更加高效，而且不受连接数的限制。TDengine实现了自己的超时、重传、确认等机制，以确保UDP的可靠传输。对于数据量不到15K的数据包，采取UDP的方式进行传输，超过15K的，或者是查询类的操作，自动采取TCP的方式进行传输。同时，TDengine根据配置和数据包，会自动对数据进行压缩/解压缩，数字签名/认证等处理。对于数据节点之间的数据复制，只采用TCP方式进行数据传输。

-**FQDN配置**：一个数据节点有一个或多个FQDN，可以在系统配置文件taos.cfg通过参数“fqdn"进行指定，如果没有指定，系统将自动获取FQDN。如果节点没有配置FQDN，可以直接将该节点的配置参数fqdn设置为它的IP地址。但不建议使用IP，因为IP地址可变，一旦变化，将让集群无法正常工作。一个数据节点的EP(End Point)由FQDN + Port组成。采用FQDN，需要保证DNS服务正常工作，或者在节点以及应用所在的节点配置好hosts文件。
+**FQDN配置**：一个数据节点有一个或多个FQDN，可以在系统配置文件taos.cfg通过参数“fqdn"进行指定，如果没有指定，系统将自动获取计算机的hostname作为其FQDN。如果节点没有配置FQDN，可以直接将该节点的配置参数fqdn设置为它的IP地址。但不建议使用IP，因为IP地址可变，一旦变化，将让集群无法正常工作。一个数据节点的EP(End Point)由FQDN + Port组成。采用FQDN，需要保证DNS服务正常工作，或者在节点以及应用所在的节点配置好hosts文件。

 **端口配置：**一个数据节点对外的端口由TDengine的系统配置参数serverPort决定，对集群内部通讯的端口是serverPort+5。集群内数据节点之间的数据复制操作还占有一个TCP端口，是serverPort+10. 为支持多线程高效的处理UDP数据，每个对内和对外的UDP连接，都需要占用5个连续的端口。因此一个数据节点总的端口范围为serverPort到serverPort + 10，总共11个TCP/UDP端口。使用时，需要确保防火墙将这些端口打开。每个数据节点可以配置不同的serverPort。

@@ -153,6 +153,7 @@ TDengine除vnode分片之外，还对时序数据按照时间段进行分区。
 当新的数据节点被添加进集群，因为新的计算和存储被添加进来，系统也将自动启动负载均衡流程。

 负载均衡过程无需任何人工干预，应用也无需重启，将自动连接新的节点，完全透明。
+**提示：负载均衡由参数balance控制，决定开启/关闭自动负载均衡。**

 ## 数据写入与复制流程
 如果一个数据库有N个副本，那一个虚拟节点组就有N个虚拟节点，但是只有一个是Master，其他都是slave。当应用将新的记录写入系统时，只有Master vnode能接受写的请求。如果slave vnode收到写的请求，系统将通知taosc需要重新定向。
@@ -192,7 +193,8 @@ Master Vnode遵循下面的写入流程：

 理论上，只要是异步复制，就无法保证100%不丢失。但是这个窗口极小，mater与slave要同时发生故障，而且发生在刚给应用确认写入成功之后。

-注：异地容灾、IDC无中断迁移，仅仅企业版支持
+注：异地容灾、IDC无中断迁移，仅仅企业版支持。
+**提示：该功能暂未提供**

 ### 主从选择
 Vnode会保持一个数据版本号(Version)，对内存数据进行持久化存储时，对该版本号也进行持久化存储。每个数据更新操作，无论是采集的时序数据还是元数据，这个版本号将增一。
@@ -259,6 +261,7 @@ dataDir /mnt/disk6/taos 2
 挂载的盘也可以是非本地的网络盘，只要系统能访问即可。

 注：多级存储功能仅企业版支持
+**提示：该功能暂未提供**

 ## 数据查询
 TDengine提供了多种多样针对表和超级表的查询处理功能，除了常规的聚合查询之外，还提供针对时序数据的窗口查询、统计聚合等功能。TDengine的查询处理需要客户端、vnode, mnode节点协同完成。
@@ -289,11 +292,18 @@ select count(*) from d1001 interval(1h) fill(prev);
 针对d1001设备采集数据统计每小时记录数，如果某一个小时不存在数据，这返回之前一个小时的统计数据。TDengine提供前向插值(prev)、线性插值(linear)、NULL值填充(NULL)、特定值填充(value)。

 ### 多表聚合查询
-多表聚合查询与单表查询的整体流程相同，但是存在如下的差异：
-
- 由于多表可能分布在不同的节点(dnode)，因此多表的聚合查询需要首先获得表所在的全部数据节点的信息，并且同时向相关的dnode发出查询请求。
- 每个vnode的计算获得的中间结果(partial results)需要进行第二阶段的聚合才能形成最终结果，第二阶段的聚合过程在客户端完成。
- 由于表标签信息存储在vnode中，因此针对标签信息的查询也需要vnode完成。客户端将标签的过滤表达式封装在查询请求结构体中发送给vnode，由vnode的查询执行线程从中抽取出标签查询条件，然后执行查询。标签查询与过滤是在针对表的查询之前完成。标签查询完成以后，将符合条件的表纳入到接下来的查询处理流程中。
+TDengine对每个数据采集点单独建表，但在实际应用中经常需要对不同的采集点数据进行聚合。为高效的进行聚合操作，TDengine引入超级表（STable）的概念。超级表用来代表一特定类型的数据采集点，它是包含多张表的表集合，集合里每张表的模式（schema）完全一致，但每张表都带有自己的静态标签，标签可以多个，可以随时增加、删除和修改。 应用可通过指定标签的过滤条件，对一个STable下的全部或部分表进行聚合或统计操作，这样大大简化应用的开发。其具体流程如下图所示：
+<center> <img src="../assets/multi_tables.png">  </center>
+
+<center> 图 5 多表聚合查询原理图  </center>
+1：应用将一个查询条件发往系统；
+2: taosc将超级表的名字发往 Meta Node（管理节点)；
+3：管理节点将超级表所拥有的 vnode 列表发回 taosc；
+4：taosc将计算的请求连同标签过滤条件发往这些vnode对应的多个数据节点；
+5：每个vnode先在内存里查找出自己节点里符合标签过滤条件的表的集合，然后扫描存储的时序数据，完成相应的聚合计算，将结果返回给taosc；
+6：taosc将多个数据节点返回的结果做最后的聚合，将其返回给应用。 
+
+由于TDengine在vnode内将标签数据与时序数据分离存储，通过在内存里过滤标签数据，先找到需要参与聚合操作的表的集合，将需要扫描的数据集大幅减少，大幅提升聚合计算速度。同时，由于数据分布在多个vnode/dnode，聚合计算操作在多个vnode里并发进行，又进一步提升了聚合的速度。 对普通表的聚合函数以及绝大部分操作都适用于超级表，语法完全一样，细节请看 TAOS SQL。

 ### 预计算
 为有效提升查询处理的性能，针对物联网数据的不可更改的特点，在数据块头部记录该数据块中存储数据的统计信息：包括最大值、最小值、和。我们称之为预计算单元。如果查询处理涉及整个数据块的全部数据，直接使用预计算结果，完全不需要读取数据块的内容。由于预计算数据量远小于磁盘上存储的数据块数据的大小，对于磁盘IO为瓶颈的查询处理，使用预计算结果可以极大地减小读取IO压力，加速查询处理的流程。预计算机制与Postgre SQL的索引BRIN（block range index）有异曲同工之妙。
--- a/documentation20/webdocs/markdowndocs/cluster-ch.md
+++ b/documentation20/webdocs/markdowndocs/cluster-ch.md
@@ -8,7 +8,7 @@ TDengine的集群管理极其简单，除添加和删除节点需要人工干预

 ## 准备工作

-**第零步**：如果没有部署DNS服务，请规划集群所有物理节点的FQDN，然后按照《[一篇文章说清楚TDengine的FQDN](https://www.taosdata.com/blog/2020/09/11/1824.html)》里的步骤，将所有集群物理节点的IP与FQDN的对应关系添加好。
+**第零步**：规划集群所有物理节点的FQDN，将规划好的FQDN分别添加到每个物理节点的/etc/hostname；修改每个物理节点的/etc/hosts，将所有集群物理节点的IP与FQDN的对应添加好。【如部署了DNS，请联系网络管理员在DNS上做好相关配置】

 **第一步**：如果搭建集群的物理节点中，存有之前的测试数据、装过1.X的版本，或者装过其他版本的TDengine，请先将其删除，并清空所有数据，具体步骤请参考博客[《TDengine多种安装包的安装和卸载》](https://www.taosdata.com/blog/2019/08/09/566.html ) 
 **注意1：**因为FQDN的信息会写进文件，如果之前没有配置或者更改FQDN，且启动了TDengine。请一定在确保数据无用或者备份的前提下，清理一下之前的数据（rm -rf /var/lib/taos/）；
@@ -16,9 +16,9 @@ TDengine的集群管理极其简单，除添加和删除节点需要人工干预

 **第二步**：建议关闭所有物理节点的防火墙，至少保证端口：6030 - 6042的TCP和UDP端口都是开放的。**强烈建议**先关闭防火墙，集群搭建完毕之后，再来配置端口；

-**第三步**：在所有节点安装TDengine，且版本必须是一致的，**但不要启动taosd**。安装时，提示输入是否要加入一个已经存在的TDengine集群时，第一个物理节点直接回车创建新集群，后续物理节点则输入该集群任何一个在线的物理节点的FQDN:端口号(默认6030)；
+**第三步**：在所有物理节点安装TDengine，且版本必须是一致的，**但不要启动taosd**。安装时，提示输入是否要加入一个已经存在的TDengine集群时，第一个物理节点直接回车创建新集群，后续物理节点则输入该集群任何一个在线的物理节点的FQDN:端口号(默认6030)；

-**第四步**：检查所有数据节点，以及应用所在物理节点的网络设置：
+**第四步**：检查所有数据节点，以及应用程序所在物理节点的网络设置：

 1. 每个物理节点上执行命令`hostname -f`，查看和确认所有节点的hostname是不相同的(应用驱动所在节点无需做此项检查)；
 2. 每个物理节点上执行`ping host`, 其中host是其他物理节点的hostname, 看能否ping通其它物理节点; 如果不能ping通，需要检查网络设置, 或/etc/hosts文件(Windows系统默认路径为C:\Windows\system32\drivers\etc\hosts)，或DNS的配置。如果无法ping通，是无法组成集群的；

--- a/documentation20/webdocs/markdowndocs/insert-ch.md
+++ b/documentation20/webdocs/markdowndocs/insert-ch.md
@@ -22,7 +22,7 @@ INSERT INTO d1001 VALUES (1538548685000, 10.3, 219, 0.31) (1538548695000, 12.6,

 **Tips:** 

- 要提高写入效率，需要批量写入。一批写入的记录条数越多，插入效率就越高。但一条记录不能超过16K，一条SQL语句总长度不能超过64K（可通过参数maxSQLLength配置，最大可配置为8M）。
+- 要提高写入效率，需要批量写入。一批写入的记录条数越多，插入效率就越高。但一条记录不能超过16K，一条SQL语句总长度不能超过64K（可通过参数maxSQLLength配置，最大可配置为1M）。
 - TDengine支持多线程同时写入，要进一步提高写入速度，一个客户端需要打开20个以上的线程同时写。但线程数达到一定数量后，无法再提高，甚至还会下降，因为线程切频繁切换，带来额外开销。
 - 对同一张表，如果新插入记录的时间戳已经存在，新记录将被直接抛弃，也就是说，在一张表里，时间戳必须是唯一的。如果应用自动生成记录，很有可能生成的时间戳是一样的，这样，成功插入的记录条数会小于应用插入的记录条数。
 - 写入的数据的时间戳必须大于当前时间减去配置参数keep的时间。如果keep配置为3650天，那么无法写入比3650天还老的数据。写入数据的时间戳也不能大于当前时间加配置参数days。如果days配置为2，那么无法写入比当前时间还晚2天的数据。

--- a/packaging/docker/Dockerfile
+++ b/packaging/docker/Dockerfile
@@ -2,9 +2,11 @@ FROM centos:7

 WORKDIR /root

+ARG version
+RUN echo $version
 COPY tdengine.tar.gz /root/
 RUN tar -zxf tdengine.tar.gz
-WORKDIR /root/TDengine-server/
+WORKDIR /root/TDengine-server-$version/
 RUN sh install.sh -e no



--- a/packaging/docker/dockerbuild.sh
+++ b/packaging/docker/dockerbuild.sh
 #!/bin/bash
 set -x
-docker build --rm -f "Dockerfile" -t tdengine/tdengine:$1 "."
+docker build --rm -f "Dockerfile" -t tdengine/tdengine:$1 "." --build-arg version=$1
 docker login -u tdengine -p $2  #replace the docker registry username and password
 docker push tdengine/tdengine:$1
--- a/packaging/tools/install.sh
+++ b/packaging/tools/install.sh
@@ -272,6 +272,29 @@ function install_config() {
            break
        fi
    done	
+
+    # user email 
+    #EMAIL_PATTERN='^[A-Za-z0-9\u4e00-\u9fa5]+@[a-zA-Z0-9_-]+(\.[a-zA-Z0-9_-]+)+$'
+    #EMAIL_PATTERN='^[\w-]+(\.[\w-]+)*@[\w-]+(\.[\w-]+)+$'
+    #EMAIL_PATTERN="^[\w-]+(\.[\w-]+)*@[\w-]+(\.[\w-]+)+$"
+    echo
+    echo -e -n "${GREEN}Enter your email address for priority support or enter empty to skip${NC}: "
+    read emailAddr
+    while true; do
+        if [ ! -z "$emailAddr" ]; then
+            # check the format of the emailAddr
+            #if [[ "$emailAddr" =~ $EMAIL_PATTERN ]]; then
+                # Write the email address to temp file                    
+                email_file="${install_main_dir}/email" 
+                ${csudo} bash -c "echo $emailAddr > ${email_file}"
+                break         
+            #else
+            #    read -p "Please enter the correct email address: " emailAddr   
+            #fi
+        else
+            break
+        fi
+    done	
 }



--- a/src/balance/src/balance.c
+++ b/src/balance/src/balance.c
@@ -957,11 +957,11 @@ static void balanceMonitorDnodeModule() {
      continue;
    }

-    mLInfo("dnode:%d, numOfMnodes:%d expect:%d, add mnode in this dnode", pDnode->dnodeId, numOfMnodes, tsNumOfMnodes);
-    mnodeAddMnode(pDnode->dnodeId);
+    mLInfo("dnode:%d, numOfMnodes:%d expect:%d, create mnode in this dnode", pDnode->dnodeId, numOfMnodes, tsNumOfMnodes);
+    mnodeCreateMnode(pDnode->dnodeId, pDnode->dnodeEp, true);
    
-    numOfMnodes = mnodeGetMnodesNum();
-    if (numOfMnodes >= tsNumOfMnodes) return;
+    // Only create one mnode each time
+    return;
  }
 }


--- a/src/client/src/tscSql.c
+++ b/src/client/src/tscSql.c
@@ -20,6 +20,7 @@
 #include "tcache.h"
 #include "tnote.h"
 #include "trpc.h"
+#include "ttimer.h"
 #include "tscLog.h"
 #include "tscSubquery.h"
 #include "tscUtil.h"
@@ -260,6 +261,9 @@ void taos_close(TAOS *taos) {
    return;
  }

+  pObj->signature = NULL;
+  taosTmrStopA(&(pObj->pTimer));
+
  SSqlObj* pHb = pObj->pHb;
  if (pHb != NULL && atomic_val_compare_exchange_ptr(&pObj->pHb, pHb, 0) == pHb) {
    if (pHb->pRpcCtx != NULL) {  // wait for rsp from dnode
@@ -698,8 +702,10 @@ void taos_stop_query(TAOS_RES *res) {
    tscKillSTableQuery(pSql);
  } else {
    if (pSql->cmd.command < TSDB_SQL_LOCAL) {
-      assert(pSql->pRpcCtx != NULL);
+      if (pSql->pRpcCtx != NULL) {
        rpcCancelRequest(pSql->pRpcCtx);
+        pSql->pRpcCtx = NULL;
+      }
    }
  }


--- a/src/common/inc/tglobal.h
+++ b/src/common/inc/tglobal.h
@@ -34,6 +34,7 @@ extern int32_t  tsStatusInterval;
 extern int32_t  tsNumOfMnodes;
 extern int32_t  tsEnableVnodeBak;
 extern int32_t  tsEnableTelemetryReporting;
+extern char     tsEmail[];

 // common
 extern int      tsRpcTimer;

--- a/src/common/src/tglobal.c
+++ b/src/common/src/tglobal.c
@@ -42,6 +42,7 @@ int32_t  tsStatusInterval = 1;  // second
 int32_t  tsNumOfMnodes = 3;
 int32_t  tsEnableVnodeBak = 1;
 int32_t  tsEnableTelemetryReporting = 1;
+char     tsEmail[TSDB_FQDN_LEN] = {0};

 // common
 int32_t tsRpcTimer = 1000;

--- a/go @ 8d7bf743
+++ b/go @ 8d7bf743
-Subproject commit 8c58c512b6acda8bcdfa48fdc7140227b5221766
+Subproject commit 8d7bf743852897110cbdcc7c4322cd7a74d4167b
--- a/src/dnode/src/dnodeMPeer.c
+++ b/src/dnode/src/dnodeMPeer.c
@@ -33,7 +33,8 @@ typedef struct {
 } SMPeerWorker;

 typedef struct {
-  int32_t       num;
+  int32_t curNum;
+  int32_t maxNum;
  SMPeerWorker *peerWorker;
 } SMPeerWorkerPool;

@@ -46,37 +47,44 @@ static void *dnodeProcessMnodePeerQueue(void *param);
 int32_t dnodeInitMnodePeer() {
  tsMPeerQset = taosOpenQset();
  
-  tsMPeerPool.num = 1;
-  tsMPeerPool.peerWorker = (SMPeerWorker *)calloc(sizeof(SMPeerWorker), tsMPeerPool.num);
+  tsMPeerPool.maxNum = 1;
+  tsMPeerPool.curNum = 0;
+  tsMPeerPool.peerWorker = (SMPeerWorker *)calloc(sizeof(SMPeerWorker), tsMPeerPool.maxNum);

  if (tsMPeerPool.peerWorker == NULL) return -1;
-  for (int32_t i = 0; i < tsMPeerPool.num; ++i) {
+  for (int32_t i = 0; i < tsMPeerPool.maxNum; ++i) {
    SMPeerWorker *pWorker = tsMPeerPool.peerWorker + i;
    pWorker->workerId = i;
+    dDebug("dnode mpeer worker:%d is created", i);
  }

-  dInfo("dnode mpeer is opened");
+  dDebug("dnode mpeer is opened, workers:%d qset:%p", tsMPeerPool.maxNum, tsMPeerQset);
  return 0;
 }

 void dnodeCleanupMnodePeer() {
-  for (int32_t i = 0; i < tsMPeerPool.num; ++i) {
+  for (int32_t i = 0; i < tsMPeerPool.maxNum; ++i) {
    SMPeerWorker *pWorker = tsMPeerPool.peerWorker + i;
    if (pWorker->thread) {
      taosQsetThreadResume(tsMPeerQset);
    }
+    dDebug("dnode mpeer worker:%d is closed", i);
  }

-  for (int32_t i = 0; i < tsMPeerPool.num; ++i) {
+  for (int32_t i = 0; i < tsMPeerPool.maxNum; ++i) {
    SMPeerWorker *pWorker = tsMPeerPool.peerWorker + i;
+    dDebug("dnode mpeer worker:%d start to join", i);
    if (pWorker->thread) {
      pthread_join(pWorker->thread, NULL);
    }
+    dDebug("dnode mpeer worker:%d join success", i);
  }

+  dDebug("dnode mpeer is closed, qset:%p", tsMPeerQset);
+
  taosCloseQset(tsMPeerQset);
+  tsMPeerQset = NULL;
  taosTFree(tsMPeerPool.peerWorker);
-  dInfo("dnode mpeer is closed");
 }

 int32_t dnodeAllocateMnodePqueue() {
@@ -85,7 +93,7 @@ int32_t dnodeAllocateMnodePqueue() {

  taosAddIntoQset(tsMPeerQset, tsMPeerQueue, NULL);

-  for (int32_t i = 0; i < tsMPeerPool.num; ++i) {
+  for (int32_t i = tsMPeerPool.curNum; i < tsMPeerPool.maxNum; ++i) {
    SMPeerWorker *pWorker = tsMPeerPool.peerWorker + i;
    pWorker->workerId = i;

@@ -98,7 +106,9 @@ int32_t dnodeAllocateMnodePqueue() {
    }

    pthread_attr_destroy(&thAttr);
-    dDebug("dnode mpeer worker:%d is launched, total:%d", pWorker->workerId, tsMPeerPool.num);
+
+    tsMPeerPool.curNum = i + 1;
+    dDebug("dnode mpeer worker:%d is launched, total:%d", pWorker->workerId, tsMPeerPool.maxNum);
  }

  dDebug("dnode mpeer queue:%p is allocated", tsMPeerQueue);
@@ -106,6 +116,7 @@ int32_t dnodeAllocateMnodePqueue() {
 }

 void dnodeFreeMnodePqueue() {
+  dDebug("dnode mpeer queue:%p is freed", tsMPeerQueue);
  taosCloseQueue(tsMPeerQueue);
  tsMPeerQueue = NULL;
 }
@@ -148,7 +159,7 @@ static void *dnodeProcessMnodePeerQueue(void *param) {
  
  while (1) {
    if (taosReadQitemFromQset(tsMPeerQset, &type, (void **)&pPeerMsg, &unUsed) == 0) {
-      dDebug("dnodeProcessMnodePeerQueue: got no message from qset, exiting...");
+      dDebug("qset:%p, mnode peer got no message from qset, exiting", tsMPeerQset);
      break;
    }


--- a/src/dnode/src/dnodeMRead.c
+++ b/src/dnode/src/dnodeMRead.c
@@ -33,7 +33,8 @@ typedef struct {
 } SMReadWorker;

 typedef struct {
-  int32_t       num;
+  int32_t curNum;
+  int32_t maxNum;
  SMReadWorker *readWorker;
 } SMReadWorkerPool;

@@ -46,40 +47,46 @@ static void *dnodeProcessMnodeReadQueue(void *param);
 int32_t dnodeInitMnodeRead() {
  tsMReadQset = taosOpenQset();

-  tsMReadPool.num = tsNumOfCores * tsNumOfThreadsPerCore / 2;
-  tsMReadPool.num = MAX(2, tsMReadPool.num);
-  tsMReadPool.num = MIN(4, tsMReadPool.num);
-  tsMReadPool.readWorker = (SMReadWorker *)calloc(sizeof(SMReadWorker), tsMReadPool.num);
+  tsMReadPool.maxNum = tsNumOfCores * tsNumOfThreadsPerCore / 2;
+  tsMReadPool.maxNum = MAX(2, tsMReadPool.maxNum);
+  tsMReadPool.maxNum = MIN(4, tsMReadPool.maxNum);
+  tsMReadPool.curNum = 0;
+  tsMReadPool.readWorker = (SMReadWorker *)calloc(sizeof(SMReadWorker), tsMReadPool.maxNum);

  if (tsMReadPool.readWorker == NULL) return -1;
-  for (int32_t i = 0; i < tsMReadPool.num; ++i) {
+  for (int32_t i = 0; i < tsMReadPool.maxNum; ++i) {
    SMReadWorker *pWorker = tsMReadPool.readWorker + i;
    pWorker->workerId = i;
+    dDebug("dnode mread worker:%d is created", i);
  }

-  dInfo("dnode mread is opened");
+  dDebug("dnode mread is opened, workers:%d qset:%p", tsMReadPool.maxNum, tsMReadQset);
  return 0;
 }

 void dnodeCleanupMnodeRead() {
-  for (int32_t i = 0; i < tsMReadPool.num; ++i) {
+  for (int32_t i = 0; i < tsMReadPool.maxNum; ++i) {
    SMReadWorker *pWorker = tsMReadPool.readWorker + i;
    if (pWorker->thread) {
      taosQsetThreadResume(tsMReadQset);
    }
+    dDebug("dnode mread worker:%d is closed", i);
  }

-  for (int32_t i = 0; i < tsMReadPool.num; ++i) {
+  for (int32_t i = 0; i < tsMReadPool.maxNum; ++i) {
    SMReadWorker *pWorker = tsMReadPool.readWorker + i;
+    dDebug("dnode mread worker:%d start to join", i);
    if (pWorker->thread) {
      pthread_join(pWorker->thread, NULL);
    }
+    dDebug("dnode mread worker:%d start to join", i);
  }

+  dDebug("dnode mread is closed, qset:%p", tsMReadQset);
+
  taosCloseQset(tsMReadQset);
+  tsMReadQset = NULL;
  free(tsMReadPool.readWorker);
-
-  dInfo("dnode mread is closed");
 }

 int32_t dnodeAllocateMnodeRqueue() {
@@ -88,7 +95,7 @@ int32_t dnodeAllocateMnodeRqueue() {

  taosAddIntoQset(tsMReadQset, tsMReadQueue, NULL);

-  for (int32_t i = 0; i < tsMReadPool.num; ++i) {
+  for (int32_t i = tsMReadPool.curNum; i < tsMReadPool.maxNum; ++i) {
    SMReadWorker *pWorker = tsMReadPool.readWorker + i;
    pWorker->workerId = i;

@@ -101,7 +108,8 @@ int32_t dnodeAllocateMnodeRqueue() {
    }

    pthread_attr_destroy(&thAttr);
-    dDebug("dnode mread worker:%d is launched, total:%d", pWorker->workerId, tsMReadPool.num);
+    tsMReadPool.curNum = i + 1;
+    dDebug("dnode mread worker:%d is launched, total:%d", pWorker->workerId, tsMReadPool.maxNum);
  }

  dDebug("dnode mread queue:%p is allocated", tsMReadQueue);
@@ -109,6 +117,7 @@ int32_t dnodeAllocateMnodeRqueue() {
 }

 void dnodeFreeMnodeRqueue() {
+  dDebug("dnode mread queue:%p is freed", tsMReadQueue);
  taosCloseQueue(tsMReadQueue);
  tsMReadQueue = NULL;
 }
@@ -156,7 +165,7 @@ static void *dnodeProcessMnodeReadQueue(void *param) {
  
  while (1) {
    if (taosReadQitemFromQset(tsMReadQset, &type, (void **)&pReadMsg, &unUsed) == 0) {
-      dDebug("dnodeProcessMnodeReadQueue: got no message from qset, exiting...");
+      dDebug("qset:%p, mnode read got no message from qset, exiting", tsMReadQset);
      break;
    }


--- a/src/dnode/src/dnodeMWrite.c
+++ b/src/dnode/src/dnodeMWrite.c
@@ -34,7 +34,8 @@ typedef struct {
 } SMWriteWorker;

 typedef struct {
-  int32_t        num;
+  int32_t curNum;
+  int32_t maxNum;
  SMWriteWorker *writeWorker;
 } SMWriteWorkerPool;

@@ -48,37 +49,44 @@ static void *dnodeProcessMnodeWriteQueue(void *param);
 int32_t dnodeInitMnodeWrite() {
  tsMWriteQset = taosOpenQset();

-  tsMWritePool.num = 1;
-  tsMWritePool.writeWorker = (SMWriteWorker *)calloc(sizeof(SMWriteWorker), tsMWritePool.num);
+  tsMWritePool.maxNum = 1;
+  tsMWritePool.curNum = 0;
+  tsMWritePool.writeWorker = (SMWriteWorker *)calloc(sizeof(SMWriteWorker), tsMWritePool.maxNum);

  if (tsMWritePool.writeWorker == NULL) return -1;
-  for (int32_t i = 0; i < tsMWritePool.num; ++i) {
+  for (int32_t i = 0; i < tsMWritePool.maxNum; ++i) {
    SMWriteWorker *pWorker = tsMWritePool.writeWorker + i;
    pWorker->workerId = i;
+    dDebug("dnode mwrite worker:%d is created", i);
  }

-  dInfo("dnode mwrite is opened");
+  dDebug("dnode mwrite is opened, workers:%d qset:%p", tsMWritePool.maxNum, tsMWriteQset);
  return 0;
 }

 void dnodeCleanupMnodeWrite() {
-  for (int32_t i = 0; i < tsMWritePool.num; ++i) {
+  for (int32_t i = 0; i < tsMWritePool.maxNum; ++i) {
    SMWriteWorker *pWorker = tsMWritePool.writeWorker + i;
    if (pWorker->thread) {
      taosQsetThreadResume(tsMWriteQset);
    }
+    dDebug("dnode mwrite worker:%d is closed", i);
  }

-  for (int32_t i = 0; i < tsMWritePool.num; ++i) {
+  for (int32_t i = 0; i < tsMWritePool.maxNum; ++i) {
    SMWriteWorker *pWorker = tsMWritePool.writeWorker + i;
+    dDebug("dnode mwrite worker:%d start to join", i);
    if (pWorker->thread) {
      pthread_join(pWorker->thread, NULL);
    }
+    dDebug("dnode mwrite worker:%d join success", i);
  }

+  dDebug("dnode mwrite is closed, qset:%p", tsMWriteQset);
+
  taosCloseQset(tsMWriteQset);
+  tsMWriteQset = NULL;
  taosTFree(tsMWritePool.writeWorker);
-  dInfo("dnode mwrite is closed");
 }

 int32_t dnodeAllocateMnodeWqueue() {
@@ -87,7 +95,7 @@ int32_t dnodeAllocateMnodeWqueue() {

  taosAddIntoQset(tsMWriteQset, tsMWriteQueue, NULL);

-  for (int32_t i = 0; i < tsMWritePool.num; ++i) {
+  for (int32_t i = tsMWritePool.curNum; i < tsMWritePool.maxNum; ++i) {
    SMWriteWorker *pWorker = tsMWritePool.writeWorker + i;
    pWorker->workerId = i;

@@ -100,7 +108,8 @@ int32_t dnodeAllocateMnodeWqueue() {
    }

    pthread_attr_destroy(&thAttr);
-    dDebug("dnode mwrite worker:%d is launched, total:%d", pWorker->workerId, tsMWritePool.num);
+    tsMWritePool.curNum = i + 1;
+    dDebug("dnode mwrite worker:%d is launched, total:%d", pWorker->workerId, tsMWritePool.maxNum);
  }

  dDebug("dnode mwrite queue:%p is allocated", tsMWriteQueue);
@@ -108,6 +117,7 @@ int32_t dnodeAllocateMnodeWqueue() {
 }

 void dnodeFreeMnodeWqueue() {
+  dDebug("dnode mwrite queue:%p is freed", tsMWriteQueue);
  taosCloseQueue(tsMWriteQueue);
  tsMWriteQueue = NULL;
 }
@@ -122,11 +132,15 @@ void dnodeDispatchToMnodeWriteQueue(SRpcMsg *pMsg) {
  SMnodeMsg *pWrite = (SMnodeMsg *)taosAllocateQitem(sizeof(SMnodeMsg));
  mnodeCreateMsg(pWrite, pMsg);

-  dDebug("app:%p:%p, msg:%s is put into mwrite queue", pWrite->rpcMsg.ahandle, pWrite, taosMsg[pWrite->rpcMsg.msgType]);
+  dDebug("app:%p:%p, msg:%s is put into mwrite queue:%p", pWrite->rpcMsg.ahandle, pWrite,
+         taosMsg[pWrite->rpcMsg.msgType], tsMWriteQueue);
  taosWriteQitem(tsMWriteQueue, TAOS_QTYPE_RPC, pWrite);
 }

 static void dnodeFreeMnodeWriteMsg(SMnodeMsg *pWrite) {
+  dDebug("app:%p:%p, msg:%s is freed from mwrite queue:%p", pWrite->rpcMsg.ahandle, pWrite,
+         taosMsg[pWrite->rpcMsg.msgType], tsMWriteQueue);
+
  mnodeCleanupMsg(pWrite);
  taosFreeQitem(pWrite);
 }
@@ -158,7 +172,7 @@ static void *dnodeProcessMnodeWriteQueue(void *param) {
  
  while (1) {
    if (taosReadQitemFromQset(tsMWriteQset, &type, (void **)&pWrite, &unUsed) == 0) {
-      dDebug("dnodeProcessMnodeWriteQueue: got no message from qset, exiting...");
+      dDebug("qset:%p, mnode write got no message from qset, exiting", tsMWriteQset);
      break;
    }

@@ -182,8 +196,8 @@ void dnodeReprocessMnodeWriteMsg(void *pMsg) {
    dnodeSendRedirectMsg(pMsg, true);
    dnodeFreeMnodeWriteMsg(pWrite);
  } else {
-    dDebug("app:%p:%p, msg:%s is reput into mwrite queue, retry times:%d", pWrite->rpcMsg.ahandle, pWrite,
-           taosMsg[pWrite->rpcMsg.msgType], pWrite->retry);
+    dDebug("app:%p:%p, msg:%s is reput into mwrite queue:%p, retry times:%d", pWrite->rpcMsg.ahandle, pWrite,
+           taosMsg[pWrite->rpcMsg.msgType], tsMWriteQueue, pWrite->retry);

    taosWriteQitem(tsMWriteQueue, TAOS_QTYPE_RPC, pWrite);
  }

--- a/src/dnode/src/dnodeMgmt.c
+++ b/src/dnode/src/dnodeMgmt.c
@@ -74,6 +74,7 @@ static int32_t  dnodeProcessAlterVnodeMsg(SRpcMsg *pMsg);
 static int32_t  dnodeProcessDropVnodeMsg(SRpcMsg *pMsg);
 static int32_t  dnodeProcessAlterStreamMsg(SRpcMsg *pMsg);
 static int32_t  dnodeProcessConfigDnodeMsg(SRpcMsg *pMsg);
+static int32_t dnodeProcessCreateMnodeMsg(SRpcMsg *pMsg);
 static int32_t (*dnodeProcessMgmtMsgFp[TSDB_MSG_TYPE_MAX])(SRpcMsg *pMsg);

 int32_t dnodeInitMgmt() {
@@ -82,6 +83,7 @@ int32_t dnodeInitMgmt() {
  dnodeProcessMgmtMsgFp[TSDB_MSG_TYPE_MD_DROP_VNODE]   = dnodeProcessDropVnodeMsg;
  dnodeProcessMgmtMsgFp[TSDB_MSG_TYPE_MD_ALTER_STREAM] = dnodeProcessAlterStreamMsg;
  dnodeProcessMgmtMsgFp[TSDB_MSG_TYPE_MD_CONFIG_DNODE] = dnodeProcessConfigDnodeMsg;
+  dnodeProcessMgmtMsgFp[TSDB_MSG_TYPE_MD_CREATE_MNODE] = dnodeProcessCreateMnodeMsg;

  dnodeAddClientRspHandle(TSDB_MSG_TYPE_DM_STATUS_RSP,  dnodeProcessStatusRsp);
  dnodeReadDnodeCfg();
@@ -226,7 +228,7 @@ static void *dnodeProcessMgmtQueue(void *param) {

  while (1) {
    if (taosReadQitemFromQset(tsMgmtQset, &type, (void **) &pMsg, &handle) == 0) {
-      dDebug("dnode mgmt got no message from qset, exit ...");
+      dDebug("qset:%p, dnode mgmt got no message from qset, exit", tsMgmtQset);
      break;
    }

@@ -451,10 +453,34 @@ static int32_t dnodeProcessAlterStreamMsg(SRpcMsg *pMsg) {
 }

 static int32_t dnodeProcessConfigDnodeMsg(SRpcMsg *pMsg) {
-  SMDCfgDnodeMsg *pCfg = (SMDCfgDnodeMsg *)pMsg->pCont;
+  SMDCfgDnodeMsg *pCfg = pMsg->pCont;
  return taosCfgDynamicOptions(pCfg->config);
 }

+static int32_t dnodeProcessCreateMnodeMsg(SRpcMsg *pMsg) {
+  SMDCreateMnodeMsg *pCfg = pMsg->pCont;
+  pCfg->dnodeId = htonl(pCfg->dnodeId);
+  if (pCfg->dnodeId != dnodeGetDnodeId()) {
+    dError("dnodeId:%d, in create mnode msg is not equal with saved dnodeId:%d", pCfg->dnodeId, dnodeGetDnodeId());
+    return TSDB_CODE_MND_DNODE_ID_NOT_CONFIGURED;
+  }
+
+  if (strcmp(pCfg->dnodeEp, tsLocalEp) != 0) {
+    dError("dnodeEp:%s, in create mnode msg is not equal with saved dnodeEp:%s", pCfg->dnodeEp, tsLocalEp);
+    return TSDB_CODE_MND_DNODE_EP_NOT_CONFIGURED;
+  }
+
+  dDebug("dnodeId:%d, create mnode msg is received from mnodes, numOfMnodes:%d", pCfg->dnodeId, pCfg->mnodes.nodeNum);
+  for (int i = 0; i < pCfg->mnodes.nodeNum; ++i) {
+    pCfg->mnodes.nodeInfos[i].nodeId = htonl(pCfg->mnodes.nodeInfos[i].nodeId);
+    dDebug("mnode index:%d, mnode:%d:%s", i, pCfg->mnodes.nodeInfos[i].nodeId, pCfg->mnodes.nodeInfos[i].nodeEp);
+  }
+
+  dnodeStartMnode(&pCfg->mnodes);
+
+  return TSDB_CODE_SUCCESS;
+}
+
 void dnodeUpdateMnodeEpSetForPeer(SRpcEpSet *pEpSet) {
  if (pEpSet->numOfEps <= 0) {
    dError("mnode EP list for peer is changed, but content is invalid, discard it");
@@ -465,29 +491,6 @@ void dnodeUpdateMnodeEpSetForPeer(SRpcEpSet *pEpSet) {
  for (int i = 0; i < pEpSet->numOfEps; ++i) {
    pEpSet->port[i] -= TSDB_PORT_DNODEDNODE;
    dInfo("mnode index:%d %s:%u", i, pEpSet->fqdn[i], pEpSet->port[i]);
-
-    if (!mnodeIsRunning()) {
-      if (strcmp(pEpSet->fqdn[i], tsLocalFqdn) == 0 && pEpSet->port[i] == tsServerPort) {
-        dInfo("mnode index:%d %s:%u should work as mnode", i, pEpSet->fqdn[i], pEpSet->port[i]);
-        bool find = false;
-        for (int i = 0; i < tsDMnodeInfos.nodeNum; ++i) {
-          if (tsDMnodeInfos.nodeInfos[i].nodeId == dnodeGetDnodeId()) {
-            dInfo("localEp found in mnode infos");
-            find = true;
-            break;
-          }
-        }
-
-        if (!find) {
-          dInfo("localEp not found in mnode infos, will set into mnode infos");
-          tstrncpy(tsDMnodeInfos.nodeInfos[tsDMnodeInfos.nodeNum].nodeEp, tsLocalEp, TSDB_EP_LEN);
-          tsDMnodeInfos.nodeInfos[tsDMnodeInfos.nodeNum].nodeId = dnodeGetDnodeId();
-          tsDMnodeInfos.nodeNum++;
-        }
-
-        dnodeStartMnode();
-      }
-    }
  }

  tsDMnodeEpSet = *pEpSet;
@@ -532,7 +535,9 @@ static void dnodeProcessStatusRsp(SRpcMsg *pMsg) {
  }

  vnodeSetAccess(pStatusRsp->vgAccess, pCfg->numOfVnodes);
-  dnodeProcessModuleStatus(pCfg->moduleStatus);
+
+  // will not set mnode in status msg
+  // dnodeProcessModuleStatus(pCfg->moduleStatus);
  dnodeUpdateDnodeCfg(pCfg);

  dnodeUpdateMnodeInfos(pMnodes);
@@ -576,7 +581,7 @@ static void dnodeUpdateMnodeInfos(SDMMnodeInfos *pMnodes) {
  }

  dnodeSaveMnodeInfos();
-  sdbUpdateSync();
+  sdbUpdateAsync();
 }

 static bool dnodeReadMnodeInfos() {

--- a/src/dnode/src/dnodeModule.c
+++ b/src/dnode/src/dnodeModule.c
@@ -146,7 +146,9 @@ void dnodeProcessModuleStatus(uint32_t moduleStatus) {
  }
 }

-bool dnodeStartMnode() {
+bool dnodeStartMnode(void *pMnodes) {
+  SDMMnodeInfos *mnodes = pMnodes;
+
  if (tsModuleStatus & (1 << TSDB_MOD_MNODE)) {
    dDebug("mnode module is already started, module status:%d", tsModuleStatus);
    return false;
@@ -156,6 +158,7 @@ bool dnodeStartMnode() {
  dInfo("start mnode module, module status:%d, new status:%d", tsModuleStatus, moduleStatus);
  dnodeProcessModuleStatus(moduleStatus);

-  sdbUpdateSync();
+  sdbUpdateSync(mnodes);
+
  return true;
 }
--- a/src/dnode/src/dnodePeer.c
+++ b/src/dnode/src/dnodePeer.c
@@ -48,6 +48,7 @@ int32_t dnodeInitServer() {
  dnodeProcessReqMsgFp[TSDB_MSG_TYPE_MD_DROP_VNODE]   = dnodeDispatchToMgmtQueue;
  dnodeProcessReqMsgFp[TSDB_MSG_TYPE_MD_ALTER_STREAM] = dnodeDispatchToMgmtQueue;
  dnodeProcessReqMsgFp[TSDB_MSG_TYPE_MD_CONFIG_DNODE] = dnodeDispatchToMgmtQueue;
+  dnodeProcessReqMsgFp[TSDB_MSG_TYPE_MD_CREATE_MNODE] = dnodeDispatchToMgmtQueue;

  dnodeProcessReqMsgFp[TSDB_MSG_TYPE_DM_CONFIG_TABLE] = dnodeDispatchToMnodePeerQueue;
  dnodeProcessReqMsgFp[TSDB_MSG_TYPE_DM_CONFIG_VNODE] = dnodeDispatchToMnodePeerQueue;
@@ -170,8 +171,12 @@ void dnodeSendMsgToDnode(SRpcEpSet *epSet, SRpcMsg *rpcMsg) {
  rpcSendRequest(tsDnodeClientRpc, epSet, rpcMsg);
 }

-void dnodeSendMsgToDnodeRecv(SRpcMsg *rpcMsg, SRpcMsg *rpcRsp) {
+void dnodeSendMsgToMnodeRecv(SRpcMsg *rpcMsg, SRpcMsg *rpcRsp) {
  SRpcEpSet epSet = {0};
  dnodeGetMnodeEpSetForPeer(&epSet);
  rpcSendRecv(tsDnodeClientRpc, &epSet, rpcMsg, rpcRsp);
 }
+
+void dnodeSendMsgToDnodeRecv(SRpcMsg *rpcMsg, SRpcMsg *rpcRsp, SRpcEpSet *epSet) {
+  rpcSendRecv(tsDnodeClientRpc, epSet, rpcMsg, rpcRsp);
+}
\ No newline at end of file
--- a/src/dnode/src/dnodeShell.c
+++ b/src/dnode/src/dnodeShell.c
@@ -156,7 +156,7 @@ static int dnodeRetrieveUserAuthInfo(char *user, char *spi, char *encrypt, char
  
  dDebug("user:%s, send auth msg to mnodes", user);
  SRpcMsg rpcRsp = {0};
-  dnodeSendMsgToDnodeRecv(&rpcMsg, &rpcRsp);
+  dnodeSendMsgToMnodeRecv(&rpcMsg, &rpcRsp);

  if (rpcRsp.code != 0) {
    dError("user:%s, auth msg received from mnodes, error:%s", user, tstrerror(rpcRsp.code));
@@ -189,7 +189,7 @@ void *dnodeSendCfgTableToRecv(int32_t vgId, int32_t sid) {
  rpcMsg.msgType = TSDB_MSG_TYPE_DM_CONFIG_TABLE;

  SRpcMsg rpcRsp = {0};
-  dnodeSendMsgToDnodeRecv(&rpcMsg, &rpcRsp);
+  dnodeSendMsgToMnodeRecv(&rpcMsg, &rpcRsp);
  terrno = rpcRsp.code;
  
  if (rpcRsp.code != 0) {

--- a/src/dnode/src/dnodeTelemetry.c
+++ b/src/dnode/src/dnodeTelemetry.c
@@ -178,6 +178,7 @@ static void addVersionInfo(SBufferWriter* bw) {
  addStringField(bw, "version", version);
  addStringField(bw, "buildInfo", buildinfo);
  addStringField(bw, "gitInfo", gitinfo);  
+  addStringField(bw, "email", tsEmail);  
 }

 static void addRuntimeInfo(SBufferWriter* bw) {
@@ -261,11 +262,27 @@ static void* telemetryThread(void* param) {
  return NULL;
 }

+static void dnodeGetEmail(char* filepath) {
+  int fd = open(filepath, O_RDONLY);
+  if (fd < 0) {
+    return;
+  }
+  
+  if (taosTRead(fd, (void *)tsEmail, TSDB_FQDN_LEN) < 0) {
+    dError("failed to read %d bytes from file %s since %s", TSDB_FQDN_LEN, filepath, strerror(errno));
+  } 
+
+  close(fd);   
+}
+
+
 int32_t dnodeInitTelemetry() {
  if (!tsEnableTelemetryReporting) {
    return 0;
  }

+  dnodeGetEmail("/usr/local/taos/email");  
+
  if (tsem_init(&tsExitSem, 0, 0) == -1) {
    // just log the error, it is ok for telemetry to fail
    dTrace("failed to create semaphore for telemetry, reason:%s", strerror(errno));

--- a/src/dnode/src/dnodeVRead.c
+++ b/src/dnode/src/dnodeVRead.c
@@ -199,7 +199,7 @@ static void *dnodeProcessReadQueue(void *param) {

  while (1) {
    if (taosReadQitemFromQset(readQset, &type, (void **)&pReadMsg, &pVnode) == 0) {
-      dDebug("dnodeProcessReadQueee: got no message from qset, exiting...");
+      dDebug("qset:%p dnode read got no message from qset, exiting", readQset);
      break;
    }


--- a/src/dnode/src/dnodeVWrite.c
+++ b/src/dnode/src/dnodeVWrite.c
@@ -222,7 +222,7 @@ static void *dnodeProcessWriteQueue(void *param) {
  while (1) {
    numOfMsgs = taosReadAllQitemsFromQset(pWorker->qset, pWorker->qall, &pVnode);
    if (numOfMsgs == 0) {
-      dDebug("dnodeProcessWriteQueee: got no message from qset, exiting...");
+      dDebug("qset:%p, dnode write got no message from qset, exiting", pWorker->qset);
      break;
    }


--- a/src/inc/dnode.h
+++ b/src/inc/dnode.h
@@ -43,11 +43,12 @@ void    dnodeGetMnodeEpSetForPeer(void *epSet);
 void    dnodeGetMnodeEpSetForShell(void *epSet);
 void *  dnodeGetMnodeInfos();
 int32_t dnodeGetDnodeId();
-bool    dnodeStartMnode();
+bool    dnodeStartMnode(void *pModes);

 void  dnodeAddClientRspHandle(uint8_t msgType, void (*fp)(SRpcMsg *rpcMsg));
 void  dnodeSendMsgToDnode(SRpcEpSet *epSet, SRpcMsg *rpcMsg);
-void  dnodeSendMsgToDnodeRecv(SRpcMsg *rpcMsg, SRpcMsg *rpcRsp);
+void  dnodeSendMsgToMnodeRecv(SRpcMsg *rpcMsg, SRpcMsg *rpcRsp);
+void  dnodeSendMsgToDnodeRecv(SRpcMsg *rpcMsg, SRpcMsg *rpcRsp, SRpcEpSet *epSet);
 void *dnodeSendCfgTableToRecv(int32_t vgId, int32_t sid);

 void *dnodeAllocateVnodeWqueue(void *pVnode);

--- a/src/inc/mnode.h
+++ b/src/inc/mnode.h
@@ -60,7 +60,8 @@ int32_t mnodeInitSystem();
 int32_t mnodeStartSystem();
 void    mnodeCleanupSystem();
 void    mnodeStopSystem();
-void    sdbUpdateSync();
+void    sdbUpdateAsync();
+void    sdbUpdateSync(void *pMnodes);
 bool    mnodeIsRunning();
 int32_t mnodeProcessRead(SMnodeMsg *pMsg);
 int32_t mnodeProcessWrite(SMnodeMsg *pMsg);

--- a/src/inc/taoserror.h
+++ b/src/inc/taoserror.h
@@ -139,6 +139,8 @@ TAOS_DEFINE_ERROR(TSDB_CODE_MND_VGROUP_ALREADY_IN_DNODE,  0, 0x0339, "Vgroup alr
 TAOS_DEFINE_ERROR(TSDB_CODE_MND_DNODE_NOT_FREE,           0, 0x033A, "Dnode not avaliable")
 TAOS_DEFINE_ERROR(TSDB_CODE_MND_INVALID_CLUSTER_ID,       0, 0x033B, "Cluster id not match")
 TAOS_DEFINE_ERROR(TSDB_CODE_MND_NOT_READY,                0, 0x033C, "Cluster not ready")
+TAOS_DEFINE_ERROR(TSDB_CODE_MND_DNODE_ID_NOT_CONFIGURED,  0, 0x033D, "Dnode Id not configured")
+TAOS_DEFINE_ERROR(TSDB_CODE_MND_DNODE_EP_NOT_CONFIGURED,  0, 0x033E, "Dnode Ep not configured")

 TAOS_DEFINE_ERROR(TSDB_CODE_MND_ACCT_ALREADY_EXIST,       0, 0x0340, "Account already exists")
 TAOS_DEFINE_ERROR(TSDB_CODE_MND_INVALID_ACCT,             0, 0x0341, "Invalid account")

--- a/src/inc/taosmsg.h
+++ b/src/inc/taosmsg.h
@@ -59,7 +59,7 @@ TAOS_DEFINE_MESSAGE_TYPE( TSDB_MSG_TYPE_MD_DROP_STABLE, "drop-stable" )
 TAOS_DEFINE_MESSAGE_TYPE( TSDB_MSG_TYPE_MD_ALTER_STREAM, "alter-stream" )
 TAOS_DEFINE_MESSAGE_TYPE( TSDB_MSG_TYPE_MD_CONFIG_DNODE, "config-dnode" )
 TAOS_DEFINE_MESSAGE_TYPE( TSDB_MSG_TYPE_MD_ALTER_VNODE, "alter-vnode" )
-TAOS_DEFINE_MESSAGE_TYPE( TSDB_MSG_TYPE_DUMMY5, "dummy5" )
+TAOS_DEFINE_MESSAGE_TYPE( TSDB_MSG_TYPE_MD_CREATE_MNODE, "create-mnode" )
 TAOS_DEFINE_MESSAGE_TYPE( TSDB_MSG_TYPE_DUMMY6, "dummy6" )
 TAOS_DEFINE_MESSAGE_TYPE( TSDB_MSG_TYPE_DUMMY7, "dummy7" )

@@ -719,6 +719,12 @@ typedef struct {
  char     ep[TSDB_EP_LEN];  // end point, hostname:port
 } SCMCreateDnodeMsg, SCMDropDnodeMsg;

+typedef struct {
+  int32_t dnodeId;
+  char    dnodeEp[TSDB_EP_LEN];  // end point, hostname:port
+  SDMMnodeInfos mnodes;
+} SMDCreateMnodeMsg;
+
 typedef struct {
  int32_t dnodeId;
  int32_t vgId;

--- a/src/mnode/inc/mnodeMnode.h
+++ b/src/mnode/inc/mnodeMnode.h
@@ -31,7 +31,7 @@ typedef enum {
 int32_t mnodeInitMnodes();
 void    mnodeCleanupMnodes();

-int32_t mnodeAddMnode(int32_t dnodeId);
+void    mnodeCreateMnode(int32_t dnodeId, char *dnodeEp, bool needConfirm);
 int32_t mnodeDropMnode(int32_t dnodeId);
 void    mnodeDropMnodeLocal(int32_t dnodeId);


--- a/src/mnode/src/mnodeCluster.c
+++ b/src/mnode/src/mnodeCluster.c
@@ -224,7 +224,7 @@ static int32_t mnodeRetrieveClusters(SShowObj *pShow, char *data, int32_t rows,
    mnodeDecClusterRef(pCluster);
    numOfRows++;
  }
-
+  mnodeVacuumResult(data, cols, numOfRows, rows, pShow);
  pShow->numOfReads += numOfRows;
  return numOfRows;
 }
--- a/src/mnode/src/mnodeDb.c
+++ b/src/mnode/src/mnodeDb.c
@@ -760,6 +760,8 @@ static int32_t mnodeRetrieveDbs(SShowObj *pShow, char *data, int32_t rows, void
  }

  pShow->numOfReads += numOfRows;
+  mnodeVacuumResult(data, cols, numOfRows, rows, pShow);
+
  mnodeDecUserRef(pUser);
  return numOfRows;
 }

--- a/src/mnode/src/mnodeDnode.c
+++ b/src/mnode/src/mnodeDnode.c
@@ -147,7 +147,7 @@ static int32_t mnodeDnodeActionRestored() {
    mnodeCreateDnode(tsLocalEp, NULL);
    SDnodeObj *pDnode = mnodeGetDnodeByEp(tsLocalEp);
    if (pDnode != NULL) {
-      mnodeAddMnode(pDnode->dnodeId);
+      mnodeCreateMnode(pDnode->dnodeId, pDnode->dnodeEp, false);
      mnodeDecDnodeRef(pDnode);
    }
  }
@@ -857,6 +857,7 @@ int32_t mnodeRetrieveModules(SShowObj *pShow, char *data, int32_t rows, void *pC

  char* pWrite;
  char* moduleName[5] = { "MNODE", "HTTP", "MONITOR", "MQTT", "UNKNOWN" };
+  int32_t cols;

  while (numOfRows < rows) {
    SDnodeObj *pDnode = NULL;
@@ -864,7 +865,7 @@ int32_t mnodeRetrieveModules(SShowObj *pShow, char *data, int32_t rows, void *pC
    if (pDnode == NULL) break;

    for (int32_t moduleType = 0; moduleType < TSDB_MOD_MAX; ++moduleType) {
-      int32_t cols = 0;
+      cols = 0;

      pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows;
      *(int16_t *)pWrite = pDnode->dnodeId;
@@ -890,6 +891,7 @@ int32_t mnodeRetrieveModules(SShowObj *pShow, char *data, int32_t rows, void *pC

    mnodeDecDnodeRef(pDnode);
  }
+  mnodeVacuumResult(data, cols, numOfRows, rows, pShow);

  pShow->numOfReads += numOfRows;
  return numOfRows;
@@ -1081,6 +1083,7 @@ static int32_t mnodeRetrieveVnodes(SShowObj *pShow, char *data, int32_t rows, vo
  } else {
    numOfRows = 0;
  }
+  mnodeVacuumResult(data, cols, numOfRows, rows, pShow);

  pShow->numOfReads += numOfRows;
  return numOfRows;

--- a/src/mnode/src/mnodeMain.c
+++ b/src/mnode/src/mnodeMain.c
@@ -109,7 +109,7 @@ int32_t mnodeStartSystem() {

  mInfo("mnode is initialized successfully");

-  sdbUpdateSync();
+  sdbUpdateSync(NULL);

  return 0;
 }

--- a/src/mnode/src/mnodeMnode.c
+++ b/src/mnode/src/mnodeMnode.c
@@ -23,6 +23,8 @@
 #include "tutil.h"
 #include "tsocket.h"
 #include "tdataformat.h"
+#include "dnode.h"
+#include "mnode.h"
 #include "mnodeDef.h"
 #include "mnodeInt.h"
 #include "mnodeMnode.h"
@@ -30,6 +32,7 @@
 #include "mnodeSdb.h"
 #include "mnodeShow.h"
 #include "mnodeUser.h"
+#include "mnodeVgroup.h"

 static void *        tsMnodeSdb = NULL;
 static int32_t       tsMnodeUpdateSize = 0;
@@ -266,7 +269,61 @@ void mnodeGetMnodeInfos(void *mnodeInfos) {
  mnodeMnodeUnLock();
 }

-int32_t mnodeAddMnode(int32_t dnodeId) {
+static int32_t mnodeSendCreateMnodeMsg(int32_t dnodeId, char *dnodeEp) {
+  mDebug("dnode:%d, send create mnode msg to dnode %s", dnodeId, dnodeEp);
+
+  SMDCreateMnodeMsg *pCreate = rpcMallocCont(sizeof(SMDCreateMnodeMsg));
+  if (pCreate == NULL) {
+    return TSDB_CODE_MND_OUT_OF_MEMORY;
+  } else {
+    pCreate->dnodeId = htonl(dnodeId);
+    tstrncpy(pCreate->dnodeEp, dnodeEp, sizeof(pCreate->dnodeEp));
+    pCreate->mnodes = tsMnodeInfos;
+    bool found = false;
+    for (int i = 0; i < pCreate->mnodes.nodeNum; ++i) {
+      if (pCreate->mnodes.nodeInfos[i].nodeId == htonl(dnodeId)) {
+        found = true;
+      }
+    }
+    if (!found) {
+      pCreate->mnodes.nodeInfos[pCreate->mnodes.nodeNum].nodeId = htonl(dnodeId);
+      tstrncpy(pCreate->mnodes.nodeInfos[pCreate->mnodes.nodeNum].nodeEp, dnodeEp, sizeof(pCreate->dnodeEp));
+      pCreate->mnodes.nodeNum++;
+    }
+  }
+
+  SRpcMsg rpcMsg = {0};
+  rpcMsg.pCont = pCreate;
+  rpcMsg.contLen = sizeof(SMDCreateMnodeMsg);
+  rpcMsg.msgType = TSDB_MSG_TYPE_MD_CREATE_MNODE;
+
+  SRpcMsg   rpcRsp = {0};
+  SRpcEpSet epSet = mnodeGetEpSetFromIp(pCreate->dnodeEp);
+  dnodeSendMsgToDnodeRecv(&rpcMsg, &rpcRsp, &epSet);
+
+  if (rpcRsp.code != TSDB_CODE_SUCCESS) {
+    mError("dnode:%d, failed to send create mnode msg, ep:%s reason:%s", dnodeId, dnodeEp, tstrerror(rpcRsp.code));
+  } else {
+    mDebug("dnode:%d, create mnode msg is disposed, mnode is created in dnode", dnodeId);
+  }
+
+  rpcFreeCont(rpcRsp.pCont);
+  return rpcRsp.code;
+}
+
+static int32_t mnodeCreateMnodeCb(SMnodeMsg *pMsg, int32_t code) {
+  if (code != TSDB_CODE_SUCCESS) {
+    mError("failed to create mnode, reason:%s", tstrerror(code));
+  } else {
+    mDebug("mnode is created successfully");
+    mnodeUpdateMnodeEpSet();
+    sdbUpdateAsync();
+  }
+
+  return code;
+}
+
+void mnodeCreateMnode(int32_t dnodeId, char *dnodeEp, bool needConfirm) {
  SMnodeObj *pMnode = calloc(1, sizeof(SMnodeObj));
  pMnode->mnodeId = dnodeId;
  pMnode->createdTime = taosGetTimestampMs();
@@ -275,16 +332,24 @@ int32_t mnodeAddMnode(int32_t dnodeId) {
    .type  = SDB_OPER_GLOBAL,
    .table = tsMnodeSdb,
    .pObj  = pMnode,
+    .writeCb = mnodeCreateMnodeCb
  };

-  int32_t code = sdbInsertRow(&oper);
-  if (code != TSDB_CODE_SUCCESS && code != TSDB_CODE_MND_ACTION_IN_PROGRESS) {
-    taosTFree(pMnode);
+  int32_t code = TSDB_CODE_SUCCESS;
+  if (needConfirm) {
+    code = mnodeSendCreateMnodeMsg(dnodeId, dnodeEp);
  }

-  mnodeUpdateMnodeEpSet();
+  if (code != TSDB_CODE_SUCCESS) {
+    taosTFree(pMnode);
+    return;
+  }

-  return code;
+  code = sdbInsertRow(&oper);
+  if (code != TSDB_CODE_SUCCESS && code != TSDB_CODE_MND_ACTION_IN_PROGRESS) {
+    mError("dnode:%d, failed to create mnode, ep:%s reason:%s", dnodeId, dnodeEp, tstrerror(code));
+    taosTFree(pMnode);
+  }
 }

 void mnodeDropMnodeLocal(int32_t dnodeId) {
@@ -296,6 +361,7 @@ void mnodeDropMnodeLocal(int32_t dnodeId) {
  }

  mnodeUpdateMnodeEpSet();
+  sdbUpdateAsync();
 }

 int32_t mnodeDropMnode(int32_t dnodeId) {
@@ -315,6 +381,7 @@ int32_t mnodeDropMnode(int32_t dnodeId) {
  sdbDecRef(tsMnodeSdb, pMnode);

  mnodeUpdateMnodeEpSet();
+  sdbUpdateAsync();

  return code;
 }
@@ -413,6 +480,7 @@ static int32_t mnodeRetrieveMnodes(SShowObj *pShow, char *data, int32_t rows, vo

    mnodeDecMnodeRef(pMnode);
  }
+  mnodeVacuumResult(data, cols, numOfRows, rows, pShow);

  pShow->numOfReads += numOfRows;


--- a/src/mnode/src/mnodePeer.c
+++ b/src/mnode/src/mnodePeer.c
@@ -58,10 +58,15 @@ int32_t mnodeProcessPeerReq(SMnodeMsg *pMsg) {
    rpcRsp->rsp = epSet;
    rpcRsp->len = sizeof(SRpcEpSet);

-    mDebug("%p, msg:%s in mpeer queue, will be redireced, numOfEps:%d inUse:%d", pMsg->rpcMsg.ahandle,
+    mDebug("%p, msg:%s in mpeer queue will be redirected, numOfEps:%d inUse:%d", pMsg->rpcMsg.ahandle,
           taosMsg[pMsg->rpcMsg.msgType], epSet->numOfEps, epSet->inUse);
    for (int32_t i = 0; i < epSet->numOfEps; ++i) {
-      mDebug("mnode index:%d ep:%s:%d", i, epSet->fqdn[i], htons(epSet->port[i]));
+      if (strcmp(epSet->fqdn[i], tsLocalFqdn) == 0 && htons(epSet->port[i]) == tsServerPort + TSDB_PORT_DNODEDNODE) {
+        epSet->inUse = (i + 1) % epSet->numOfEps;
+        mDebug("mnode index:%d ep:%s:%u, set inUse to %d", i, epSet->fqdn[i], htons(epSet->port[i]), epSet->inUse);
+      } else {
+        mDebug("mnode index:%d ep:%s:%u", i, epSet->fqdn[i], htons(epSet->port[i]));
+      }
    }

    return TSDB_CODE_RPC_REDIRECT;

--- a/src/mnode/src/mnodeRead.c
+++ b/src/mnode/src/mnodeRead.c
@@ -51,14 +51,21 @@ int32_t mnodeProcessRead(SMnodeMsg *pMsg) {
    SMnodeRsp *rpcRsp = &pMsg->rpcRsp;
    SRpcEpSet *epSet = rpcMallocCont(sizeof(SRpcEpSet));
    mnodeGetMnodeEpSetForShell(epSet);
-    rpcRsp->rsp = epSet;
-    rpcRsp->len = sizeof(SRpcEpSet);

-    mDebug("%p, msg:%s in mread queue, will be redireced, inUse:%d", pMsg->rpcMsg.ahandle, taosMsg[pMsg->rpcMsg.msgType], epSet->inUse);
+    mDebug("%p, msg:%s in mread queue will be redirected, numOfEps:%d inUse:%d", pMsg->rpcMsg.ahandle,
+           taosMsg[pMsg->rpcMsg.msgType], epSet->numOfEps, epSet->inUse);
    for (int32_t i = 0; i < epSet->numOfEps; ++i) {
-      mDebug("mnode index:%d ep:%s:%d", i, epSet->fqdn[i], htons(epSet->port[i]));
+      if (strcmp(epSet->fqdn[i], tsLocalFqdn) == 0 && htons(epSet->port[i]) == tsServerPort) {
+        epSet->inUse = (i + 1) % epSet->numOfEps;
+        mDebug("mnode index:%d ep:%s:%u, set inUse to %d", i, epSet->fqdn[i], htons(epSet->port[i]), epSet->inUse);
+      } else {
+        mDebug("mnode index:%d ep:%s:%u", i, epSet->fqdn[i], htons(epSet->port[i]));
+      }
    }

+    rpcRsp->rsp = epSet;
+    rpcRsp->len = sizeof(SRpcEpSet);
+
    return TSDB_CODE_RPC_REDIRECT;
  }


--- a/src/mnode/src/mnodeSdb.c
+++ b/src/mnode/src/mnodeSdb.c
@@ -91,6 +91,7 @@ typedef struct {
 } SSdbWriteWorkerPool;

 extern void *     tsMnodeTmr;
+static void *     tsUpdateSyncTmr;
 static SSdbObject tsSdbObj = {0};
 static taos_qset  tsSdbWriteQset;
 static taos_qall  tsSdbWriteQall;
@@ -297,27 +298,25 @@ static void sdbConfirmForward(void *ahandle, void *param, int32_t code) {
  taosFreeQitem(pOper);
 }

-void sdbUpdateSync() {
+static void sdbUpdateSyncTmrFp(void *param, void *tmrId) { sdbUpdateSync(NULL); }
+
+void sdbUpdateAsync() {
+  taosTmrReset(sdbUpdateSyncTmrFp, 200, NULL, tsMnodeTmr, &tsUpdateSyncTmr);
+}
+
+void sdbUpdateSync(void *pMnodes) {
+  SDMMnodeInfos *mnodes = pMnodes;
  if (!mnodeIsRunning()) {
-    mDebug("mnode not start yet, update sync info later");
+    mDebug("mnode not start yet, update sync config later");
    return;
  }

-  mDebug("update sync info in sdb");
+  mDebug("update sync config in sync module, mnodes:%p", pMnodes);

  SSyncCfg syncCfg = {0};
  int32_t  index = 0;

-  SDMMnodeInfos *mnodes = dnodeGetMnodeInfos();
-  for (int32_t i = 0; i < mnodes->nodeNum; ++i) {
-    SDMMnodeInfo *node = &mnodes->nodeInfos[i];
-    syncCfg.nodeInfo[i].nodeId = node->nodeId;
-    taosGetFqdnPortFromEp(node->nodeEp, syncCfg.nodeInfo[i].nodeFqdn, &syncCfg.nodeInfo[i].nodePort);
-    syncCfg.nodeInfo[i].nodePort += TSDB_PORT_SYNC;
-    index++;
-  }
-
-  if (index == 0) {
+  if (mnodes == NULL) {
    void *pIter = NULL;
    while (1) {
      SMnodeObj *pMnode = NULL;
@@ -337,9 +336,19 @@ void sdbUpdateSync() {
      mnodeDecMnodeRef(pMnode);
    }
    sdbFreeIter(pIter);
+    syncCfg.replica = index;
+    mDebug("mnodes info not input, use infos in sdb, numOfMnodes:%d", syncCfg.replica);
+  } else {
+    for (index = 0; index < mnodes->nodeNum; ++index) {
+      SDMMnodeInfo *node = &mnodes->nodeInfos[index];
+      syncCfg.nodeInfo[index].nodeId = node->nodeId;
+      taosGetFqdnPortFromEp(node->nodeEp, syncCfg.nodeInfo[index].nodeFqdn, &syncCfg.nodeInfo[index].nodePort);
+      syncCfg.nodeInfo[index].nodePort += TSDB_PORT_SYNC;
    }
-
    syncCfg.replica = index;
+    mDebug("mnodes info input, numOfMnodes:%d", syncCfg.replica);
+  }
+
  syncCfg.quorum = (syncCfg.replica == 1) ? 1 : 2;

  bool hasThisDnode = false;
@@ -350,8 +359,15 @@ void sdbUpdateSync() {
    }
  }

-  if (!hasThisDnode) return;
-  if (memcmp(&syncCfg, &tsSdbObj.cfg, sizeof(SSyncCfg)) == 0) return;
+  if (!hasThisDnode) {
+    sdbDebug("update sync config, this dnode not exist");
+    return;
+  }
+
+  if (memcmp(&syncCfg, &tsSdbObj.cfg, sizeof(SSyncCfg)) == 0) {
+    sdbDebug("update sync config, info not changed");
+    return;
+  }

  sdbInfo("work as mnode, replica:%d", syncCfg.replica);
  for (int32_t i = 0; i < syncCfg.replica; ++i) {
@@ -1038,7 +1054,7 @@ static void *sdbWorkerFp(void *param) {
  while (1) {
    numOfMsgs = taosReadAllQitemsFromQset(tsSdbWriteQset, tsSdbWriteQall, &unUsed);
    if (numOfMsgs == 0) {
-      sdbDebug("sdbWorkerFp: got no message from qset, exiting...");
+      sdbDebug("qset:%p, sdb got no message from qset, exiting", tsSdbWriteQset);
      break;
    }


--- a/src/mnode/src/mnodeUser.c
+++ b/src/mnode/src/mnodeUser.c
@@ -385,6 +385,7 @@ static int32_t mnodeRetrieveUsers(SShowObj *pShow, char *data, int32_t rows, voi
    numOfRows++;
    mnodeDecUserRef(pUser);
  }
+  mnodeVacuumResult(data, cols, numOfRows, rows, pShow);

  pShow->numOfReads += numOfRows;
  return numOfRows;

--- a/src/mnode/src/mnodeVgroup.c
+++ b/src/mnode/src/mnodeVgroup.c
@@ -310,7 +310,7 @@ void mnodeUpdateVgroupStatus(SVgObj *pVgroup, SDnodeObj *pDnode, SVnodeLoad *pVl
  for (int32_t i = 0; i < pVgroup->numOfVnodes; ++i) {
    SVnodeGid *pVgid = &pVgroup->vnodeGid[i];
    if (pVgid->pDnode == pDnode) {
-      mTrace("dnode:%d, receive status from dnode, vgId:%d status is %d", pDnode->dnodeId, pVgroup->vgId, pVgid->role);
+      mTrace("dnode:%d, receive status from dnode, vgId:%d status is %d:%s", pDnode->dnodeId, pVgroup->vgId, pVgid->role, syncRole[pVgid->role]);
      pVgid->role = pVload->role;
      if (pVload->role == TAOS_SYNC_ROLE_MASTER) {
        pVgroup->inUse = i;
@@ -771,6 +771,7 @@ static int32_t mnodeRetrieveVgroups(SShowObj *pShow, char *data, int32_t rows, v
    mnodeDecVgroupRef(pVgroup);
    numOfRows++;
  }
+  mnodeVacuumResult(data, cols, numOfRows, rows, pShow);

  pShow->numOfReads += numOfRows;
  mnodeDecTableRef(pTable);

--- a/src/mnode/src/mnodeWrite.c
+++ b/src/mnode/src/mnodeWrite.c
@@ -54,12 +54,18 @@ int32_t mnodeProcessWrite(SMnodeMsg *pMsg) {
    rpcRsp->rsp = epSet;
    rpcRsp->len = sizeof(SRpcEpSet);

-    mDebug("app:%p:%p, msg:%s will be redireced inUse:%d", pMsg->rpcMsg.ahandle, pMsg, taosMsg[pMsg->rpcMsg.msgType],
-           epSet->inUse);
+    mDebug("app:%p:%p, msg:%s in write queue, will be redirected, numOfEps:%d inUse:%d", pMsg->rpcMsg.ahandle, pMsg,
+           taosMsg[pMsg->rpcMsg.msgType], epSet->numOfEps, epSet->inUse);
    for (int32_t i = 0; i < epSet->numOfEps; ++i) {
+      if (strcmp(epSet->fqdn[i], tsLocalFqdn) == 0 && htons(epSet->port[i]) == tsServerPort) {
+        epSet->inUse = (i + 1) % epSet->numOfEps;
+        mDebug("app:%p:%p, mnode index:%d ep:%s:%d, set inUse to %d", pMsg->rpcMsg.ahandle, pMsg, i, epSet->fqdn[i],
+               htons(epSet->port[i]), epSet->inUse);
+      } else {
        mDebug("app:%p:%p, mnode index:%d ep:%s:%d", pMsg->rpcMsg.ahandle, pMsg, i, epSet->fqdn[i],
               htons(epSet->port[i]));
      }
+    }

    return TSDB_CODE_RPC_REDIRECT;
  }

--- a/src/plugins/http/src/httpQueue.c
+++ b/src/plugins/http/src/httpQueue.c
@@ -67,7 +67,7 @@ static void *httpProcessResultQueue(void *param) {
  
  while (1) {
    if (taosReadQitemFromQset(tsHttpQset, &type, (void **)&pMsg, &unUsed) == 0) {
-      httpDebug("httpResultQueue: got no message from qset, exiting...");
+      httpDebug("qset:%p, http queue got no message from qset, exiting", tsHttpQset);
      break;
    }


--- a/src/query/src/qExecutor.c
+++ b/src/query/src/qExecutor.c
@@ -4511,7 +4511,6 @@ int32_t doInitQInfo(SQInfo *pQInfo, STSBuf *pTsBuf, void *tsdb, int32_t vgId, bo
  int32_t code = TSDB_CODE_SUCCESS;
  SQuery *pQuery = pQInfo->runtimeEnv.pQuery;

-  pQuery->precision = tsdbGetCfg(tsdb)->precision;
  pRuntimeEnv->topBotQuery = isTopBottomQuery(pQuery);
  pRuntimeEnv->hasTagResults = hasTagValOutput(pQuery);

@@ -6324,6 +6323,8 @@ static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQ
    UNUSED(ret);
  }
  
+  pQuery->precision = tsdbGetCfg(tsdb)->precision;
+
  if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey > pQuery->window.ekey)) ||
      (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey > pQuery->window.skey))) {
    qDebug("QInfo:%p no result in time range %" PRId64 "-%" PRId64 ", order %d", pQInfo, pQuery->window.skey,

--- a/src/rpc/src/rpcMain.c
+++ b/src/rpc/src/rpcMain.c
@@ -542,10 +542,7 @@ void rpcCancelRequest(void *handle) {

  if (pContext->pConn) {
    tDebug("%s, app tries to cancel request", pContext->pConn->info);
-    pContext->pConn->pReqMsg = NULL;  
    rpcCloseConn(pContext->pConn);
-    pContext->pConn = NULL;
-    rpcFreeCont(pContext->pCont);
  }
 }

@@ -613,9 +610,11 @@ static void rpcReleaseConn(SRpcConn *pConn) {
    if (pConn->pReqMsg) rpcFreeCont(pConn->pReqMsg);  // do not use rpcFreeMsg
  } else {
    // if there is an outgoing message, free it
-    if (pConn->outType && pConn->pReqMsg) 
+    if (pConn->outType && pConn->pReqMsg) {
+      if (pConn->pContext) pConn->pContext->pConn = NULL; 
      rpcFreeMsg(pConn->pReqMsg);
    }
+  }

  // memset could not be used, since lockeBy can not be reset
  pConn->inType = 0;
@@ -1121,9 +1120,13 @@ static void rpcProcessIncomingMsg(SRpcConn *pConn, SRpcHead *pHead, SRpcReqConte
      SRpcEpSet *pEpSet = (SRpcEpSet*)pHead->content;
      if (pEpSet->numOfEps > 0) {
        memcpy(&pContext->epSet, pHead->content, sizeof(pContext->epSet));
-        tDebug("%s, redirect is received, numOfEps:%d", pConn->info, pContext->epSet.numOfEps);
-        for (int i=0; i<pContext->epSet.numOfEps; ++i) 
+        tDebug("%s, redirect is received, numOfEps:%d inUse:%d", pConn->info, pContext->epSet.numOfEps,
+               pContext->epSet.inUse);
+        for (int i = 0; i < pContext->epSet.numOfEps; ++i) {
          pContext->epSet.port[i] = htons(pContext->epSet.port[i]);
+          tDebug("%s, redirect is received, index:%d ep:%s:%u", pConn->info, i, pContext->epSet.fqdn[i],
+                 pContext->epSet.port[i]);
+        }
      }
      rpcSendReqToServer(pRpc, pContext);
      rpcFreeCont(rpcMsg.pCont);

--- a/src/rpc/src/rpcTcp.c
+++ b/src/rpc/src/rpcTcp.c
@@ -525,7 +525,7 @@ static void *taosProcessTcpData(void *param) {
  while (pThreadObj->pHead) {
    SFdObj *pFdObj = pThreadObj->pHead;
    pThreadObj->pHead = pFdObj->next;
-    taosFreeFdObj(pFdObj);
+    taosReportBrokenLink(pFdObj);
  }

  pthread_mutex_destroy(&(pThreadObj->mutex));

--- a/src/sync/src/syncMain.c
+++ b/src/sync/src/syncMain.c
@@ -215,6 +215,9 @@ void syncStop(void *param) {

  pthread_mutex_lock(&(pNode->mutex));

+  if (vgIdHash) taosHashRemove(vgIdHash, (const char *)&pNode->vgId, sizeof(int32_t));
+  if (pNode->pFwdTimer) taosTmrStop(pNode->pFwdTimer);
+
  for (int i = 0; i < pNode->replica; ++i) {
    pPeer = pNode->peerInfo[i];
    if (pPeer) syncRemovePeer(pPeer);
@@ -223,9 +226,6 @@ void syncStop(void *param) {
  pPeer = pNode->peerInfo[TAOS_SYNC_MAX_REPLICA];
  if (pPeer) syncRemovePeer(pPeer);

-  if (vgIdHash) taosHashRemove(vgIdHash, (const char *)&pNode->vgId, sizeof(int32_t));
-  if (pNode->pFwdTimer) taosTmrStop(pNode->pFwdTimer);
-
  pthread_mutex_unlock(&(pNode->mutex));

  syncDecNodeRef(pNode);
@@ -313,6 +313,8 @@ int32_t syncForwardToPeer(void *param, void *data, void *mhandle, int qtype) {

  // always update version
  nodeVersion = pWalHead->version;
+  sDebug("replica:%d nodeRole:%d qtype:%d", pNode->replica, nodeRole, qtype);
+
  if (pNode->replica == 1 || nodeRole != TAOS_SYNC_ROLE_MASTER) return 0;

  // only pkt from RPC or CQ can be forwarded
@@ -1189,6 +1191,8 @@ static void syncProcessFwdAck(SSyncNode *pNode, SFwdInfo *pFwdInfo, int32_t code
 static void syncMonitorFwdInfos(void *param, void *tmrId) {
  SSyncNode *pNode = param;
  SSyncFwds *pSyncFwds = pNode->pSyncFwds;
+  if (pSyncFwds == NULL) return;
+
  uint64_t   time = taosGetTimestampMs();

  if (pSyncFwds->fwds > 0) {

--- a/src/tsdb/src/tsdbRead.c
+++ b/src/tsdb/src/tsdbRead.c
@@ -2639,8 +2639,7 @@ int32_t tsdbGetTableGroupFromIdList(TSDB_REPO_T* tsdb, SArray* pTableIdList, STa
  pGroupInfo->pGroupList = taosArrayInit(1, POINTER_BYTES);
  SArray* group = taosArrayInit(1, sizeof(STableKeyInfo));

-  int32_t i = 0;
-  for(; i < size; ++i) {
+  for(int32_t i = 0; i < size; ++i) {
    STableIdInfo *id = taosArrayGet(pTableIdList, i);

    STable* pTable = tsdbGetTableByUid(tsdbGetMeta(tsdb), id->uid);
@@ -2665,8 +2664,12 @@ int32_t tsdbGetTableGroupFromIdList(TSDB_REPO_T* tsdb, SArray* pTableIdList, STa
    return terrno;
  }

-  pGroupInfo->numOfTables = i;
+  pGroupInfo->numOfTables = taosArrayGetSize(group);
+  if (pGroupInfo->numOfTables > 0) {
    taosArrayPush(pGroupInfo->pGroupList, &group);
+  } else {
+    taosArrayDestroy(group);
+  }

  return TSDB_CODE_SUCCESS;
 }

--- a/src/util/src/tqueue.c
+++ b/src/util/src/tqueue.c
@@ -263,6 +263,7 @@ void taosCloseQset(taos_qset param) {
 // thread to exit.
 void taosQsetThreadResume(taos_qset param) {
  STaosQset *qset = (STaosQset *)param;
+  uDebug("qset:%p, it will exit", qset);
  tsem_post(&qset->sem);
 }


--- a/src/wal/src/walMain.c
+++ b/src/wal/src/walMain.c
@@ -427,7 +427,7 @@ static int walRestoreWalFile(SWal *pWal, void *pVnode, FWalWrite writeFp) {
    if (!taosCheckChecksumWhole((uint8_t *)pHead, sizeof(SWalHead))) {
      wWarn("wal:%s, cksum is messed up, skip the rest of file", name);
      terrno = TSDB_CODE_WAL_FILE_CORRUPTED;
-      ASSERT(false);
+      // ASSERT(false);
      break;
    }


--- a/tests/examples/JDBC/calciteDemo/pom.xml
+++ b/tests/examples/JDBC/calciteDemo/pom.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+
+    <groupId>com.taosdata.example.calcite</groupId>
+    <artifactId>calciteDemo</artifactId>
+    <version>1.0-SNAPSHOT</version>
+
+    <dependencies>
+        <!-- slf4j -->
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-simple</artifactId>
+            <version>1.7.25</version>
+            <scope>compile</scope>
+        </dependency>
+        <!-- calcite -->
+        <dependency>
+            <groupId>org.apache.calcite</groupId>
+            <artifactId>calcite-core</artifactId>
+            <version>1.23.0</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-dbcp2</artifactId>
+            <version>2.7.0</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.calcite.avatica</groupId>
+            <artifactId>avatica-core</artifactId>
+            <version>1.17.0</version>
+        </dependency>
+
+        <!-- mysql -->
+        <dependency>
+            <groupId>mysql</groupId>
+            <artifactId>mysql-connector-java</artifactId>
+            <version>5.1.47</version>
+        </dependency>
+
+        <!-- tdengine -->
+        <dependency>
+            <groupId>com.taosdata.jdbc</groupId>
+            <artifactId>taos-jdbcdriver</artifactId>
+            <version>2.0.8</version>
+        </dependency>
+
+    </dependencies>
+
+
+</project>
\ No newline at end of file
--- a/tests/examples/JDBC/calciteDemo/src/main/java/com/taosdata/example/calcite/CalciteDemo.java
+++ b/tests/examples/JDBC/calciteDemo/src/main/java/com/taosdata/example/calcite/CalciteDemo.java
+package com.taosdata.example.calcite;
+
+import org.apache.calcite.adapter.jdbc.JdbcSchema;
+import org.apache.calcite.jdbc.CalciteConnection;
+import org.apache.calcite.schema.Schema;
+import org.apache.calcite.schema.SchemaPlus;
+import org.apache.calcite.sql.parser.SqlParseException;
+import org.apache.commons.dbcp2.BasicDataSource;
+
+import java.sql.*;
+import java.util.Properties;
+
+public class CalciteDemo {
+
+    private static String url_taos = "jdbc:TAOS://192.168.236.135:6030/test";
+    private static String url_mysql = "jdbc:mysql://master:3306/test?useSSL=false&useUnicode=true&characterEncoding=UTF-8";
+
+    public static void main(String[] args) throws SqlParseException, ClassNotFoundException, SQLException {
+        Class.forName("org.apache.calcite.jdbc.Driver");
+        Properties info = new Properties();
+        info.setProperty("caseSensitive", "false");
+
+        Connection connection = DriverManager.getConnection("jdbc:calcite:", info);
+        CalciteConnection calciteConnection = connection.unwrap(CalciteConnection.class);
+
+        SchemaPlus rootSchema = calciteConnection.getRootSchema();
+
+        //这里hdb是在tdengine中创建的数据库名
+        Schema schema = mysqlTest(rootSchema);
+//        Schema schema = tdengineTest(rootSchema);
+
+        //创建新的schema自动映射到原来的hdb数据库
+        rootSchema.add("test", schema);
+
+        Statement stmt = calciteConnection.createStatement();
+        //查询schema test中的表，表名是tdengine中的表
+        ResultSet rs = stmt.executeQuery("select * from test.t");
+        ResultSetMetaData metaData = rs.getMetaData();
+        while (rs.next()) {
+            for (int i = 1; i <= metaData.getColumnCount(); i++) {
+                System.out.println(metaData.getColumnLabel(i) + " : " + rs.getString(i));
+            }
+        }
+    }
+
+
+    private static Schema tdengineTest(SchemaPlus rootSchema) throws ClassNotFoundException {
+        Class.forName("com.taosdata.jdbc.TSDBDriver");
+        BasicDataSource dataSource = new BasicDataSource();
+        dataSource.setUrl(url_taos);
+        dataSource.setUsername("root");
+        dataSource.setPassword("taosdata");
+
+        return JdbcSchema.create(rootSchema, "test", dataSource, "hdb", null);
+    }
+
+    private static Schema mysqlTest(SchemaPlus rootSchema) throws ClassNotFoundException {
+        Class.forName("com.mysql.jdbc.Driver");
+        BasicDataSource dataSource = new BasicDataSource();
+        dataSource.setUrl(url_mysql);
+        dataSource.setUsername("root");
+        dataSource.setPassword("123456");
+
+        //Schema schema = JdbcSchema.create(rootSchema, "test", dataSource, "hdb", null);
+        return JdbcSchema.create(rootSchema, "test", dataSource, "test", null);
+    }
+}
--- a/tests/examples/JDBC/calciteDemo/src/main/resources/log4j.properties
+++ b/tests/examples/JDBC/calciteDemo/src/main/resources/log4j.properties
+log4j.rootLogger=info,stdout
+
+#console
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender 
+log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 
+log4j.appender.stdout.layout.ConversionPattern= [%d{yyyy-MM-dd HH:mm:ss a}]:%p %l%m%n
\ No newline at end of file
--- a/tests/pytest/concurrent_inquiry.py
+++ b/tests/pytest/concurrent_inquiry.py
+###################################################################
+#           Copyright (c) 2016 by TAOS Technologies, Inc.
+#                     All rights reserved.
+#
+#  This file is proprietary and confidential to TAOS Technologies.
+#  No part of this file may be reproduced, stored, transmitted,
+#  disclosed or used in any form or by any means other than as
+#  expressly provided by the written permission from Jianhui Tao
+#
+###################################################################
+
+# -*- coding: utf-8 -*-
+import threading
+import taos
+
+import json
+import time
+import random
+# query sql
+query_sql = [
+# first supertable
+"select count(*) from test.meters where c1 > 50;",
+"select count(*) from test.meters where c2 >= 50 and c2 < 100;",
+"select count(*) from test.meters where c3 != 5;",
+"select count(*) from test.meters where t3 > 2;",
+"select count(*) from test.meters where ts <> '2020-05-13 10:00:00.002';",
+"select count(*) from test.meters where t7 like 'fi%';",
+"select count(*) from test.meters where t7 like '_econd';",
+"select count(*) from test.meters interval(1n) order by ts desc;",
+"select first(*)  from test.meters;",
+"select last(*)  from test.meters;",
+"select last_row(*)  from test.meters;",
+"select twa(c1) from test.t1 where ts > 1500000001000  and ts < 1500000101000" ,
+"select avg(c1) from test.meters;",
+"select bottom(c1, 2) from test.t1;",
+"select diff(c1) from test.t1;",
+"select leastsquares(c1, 1, 1) from test.t1  ;",
+"select max(c1) from test.meters;",
+"select min(c1) from test.meters;",
+"select c1 + c2 * c3 + c1 / c5 + c4 + c2 from test.t1;",
+"select percentile(c1, 50) from test.t1;",
+"select spread(c1) from test.t1  ;",
+"select stddev(c1) from test.t1;",
+"select sum(c1) from test.meters;",
+"select top(c1, 2) from test.meters;"
+"select twa(c6) from test.t1 where ts > 1500000001000  and ts < 1500000101000" ,
+"select avg(c6) from test.meters;",
+"select bottom(c6, 2) from test.t1;",
+"select diff(c6) from test.t1;",
+"select leastsquares(c6, 1, 1) from test.t1  ;",
+"select max(c6) from test.meters;",
+"select min(c6) from test.meters;",
+"select c6 + c2 * c3 + c6 / c5 + c4 + c2 from test.t1;",
+"select percentile(c6, 50) from test.t1;",
+"select spread(c6) from test.t1  ;",
+"select stddev(c6) from test.t1;",
+"select sum(c6) from test.meters;",
+"select top(c6, 2) from test.meters;",
+# second supertable
+"select count(*) from test.meters1 where c1 > 50;",
+"select count(*) from test.meters1 where c2 >= 50 and c2 < 100;",
+"select count(*) from test.meters1 where c3 != 5;",
+"select count(*) from test.meters1 where t3 > 2;",
+"select count(*) from test.meters1 where ts <> '2020-05-13 10:00:00.002';",
+"select count(*) from test.meters1 where t7 like 'fi%';",
+"select count(*) from test.meters1 where t7 like '_econd';",
+"select count(*) from test.meters1 interval(1n) order by ts desc;",
+"select first(*)  from test.meters1;",
+"select last(*)  from test.meters1;",
+"select last_row(*)  from test.meters1;",
+"select twa(c1) from test.m1 where ts > 1500000001000  and ts < 1500000101000" ,
+"select avg(c1) from test.meters1;",
+"select bottom(c1, 2) from test.m1;",
+"select diff(c1) from test.m1;",
+"select leastsquares(c1, 1, 1) from test.m1  ;",
+"select max(c1) from test.meters1;",
+"select min(c1) from test.meters1;",
+"select c1 + c2 * c3 + c1 / c5 + c3 + c2 from test.m1;",
+"select percentile(c1, 50) from test.m1;",
+"select spread(c1) from test.m1  ;",
+"select stddev(c1) from test.m1;",
+"select sum(c1) from test.meters1;",
+"select top(c1, 2) from test.meters1;",
+"select twa(c6) from test.m1 where ts > 1500000001000  and ts < 1500000101000" ,
+"select avg(c6) from test.meters1;",
+"select bottom(c6, 2) from test.m1;",
+"select diff(c6) from test.m1;",
+"select leastsquares(c6, 1, 1) from test.m1  ;",
+"select max(c6) from test.meters1;",
+"select min(c6) from test.meters1;",
+"select c6 + c2 * c3 + c6 / c5 + c3 + c2 from test.m1;",
+"select percentile(c6, 50) from test.m1;",
+"select spread(c6) from test.m1  ;",
+"select stddev(c6) from test.m1;",
+"select sum(c6) from test.meters1;",
+"select top(c6, 2) from test.meters1;"
+]
+
+class ConcurrentInquiry:
+    def initConnection(self):  
+        self.numOfTherads = 50
+        self.ts=1500000001000
+        
+
+    def query_thread(self,threadID):
+        host = "10.211.55.14"
+        user = "root"
+        password = "taosdata"
+        conn = taos.connect(
+            host,
+            user,
+            password,
+            )
+        cl = conn.cursor()
+        
+        print("Thread %d: starting" % threadID)
+        
+        while True:
+            ran_query_sql=query_sql
+            random.shuffle(ran_query_sql)
+            for i in ran_query_sql:
+                print("Thread %d : %s"% (threadID,i))
+                try:
+                    cl.execute(i)
+                    cl.fetchall
+                except Exception as e:
+                    print(
+                "Failure thread%d, sql: %s,exception: %s" %
+                (threadID, str(i),str(e)))
+                    
+                
+            print("Thread %d: finishing" % threadID)
+          
+        
+
+    def run(self):
+       
+        threads = []
+        for i in range(50):
+            thread = threading.Thread(target=self.query_thread, args=(i,))
+            threads.append(thread)
+            thread.start()  
+        
+q = ConcurrentInquiry()
+q.initConnection()
+q.run()
--- a/tests/pytest/crash_gen.sh
+++ b/tests/pytest/crash_gen.sh
@@ -42,11 +42,37 @@ TAOSD_DIR=`find $TAOS_DIR -name "taosd"|grep bin|head -n1`

 LIB_DIR=`echo $TAOSD_DIR|rev|cut -d '/' -f 3,4,5,6|rev`/lib

+# Now getting ready to execute Python
+# The following is the default of our standard dev env (Ubuntu 20.04), modify/adjust at your own risk
+PYTHON_EXEC=python3.8
+
 # First we need to set up a path for Python to find our own TAOS modules, so that "import" can work.
-export PYTHONPATH=$(pwd)/../../src/connector/python/linux/python3
+export PYTHONPATH=$(pwd)/../../src/connector/python/linux/python3:$(pwd)

 # Then let us set up the library path so that our compiled SO file can be loaded by Python
 export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$LIB_DIR

 # Now we are all let, and let's see if we can find a crash. Note we pass all params
-python3.8 ./crash_gen.py $@
+if [[ $1 == '--valgrind' ]]; then
+  shift
+  export PYTHONMALLOC=malloc
+  VALGRIND_OUT=valgrind.out 
+  VALGRIND_ERR=valgrind.err
+  # How to generate valgrind suppression file: https://stackoverflow.com/questions/17159578/generating-suppressions-for-memory-leaks
+  # valgrind --leak-check=full --gen-suppressions=all --log-fd=9 python3.8 ./crash_gen.py $@ 9>>memcheck.log
+  echo Executing under VALGRIND, with STDOUT/ERR going to $VALGRIND_OUT and $VALGRIND_ERR, please watch them from a different terminal.
+  valgrind  \
+    --leak-check=yes \
+    --suppressions=crash_gen/valgrind_taos.supp \
+    $PYTHON_EXEC \
+    ./crash_gen/crash_gen.py $@ > $VALGRIND_OUT 2> $VALGRIND_ERR 
+elif [[ $1 == '--helgrind' ]]; then
+  shift
+  valgrind  \
+    --tool=helgrind \
+    $PYTHON_EXEC \
+    ./crash_gen/crash_gen.py $@
+else
+  $PYTHON_EXEC ./crash_gen/crash_gen.py $@
+fi
+
--- a/tests/pytest/crash_gen.py
+++ b/tests/pytest/crash_gen.py
@@ -15,7 +15,6 @@
 # https://stackoverflow.com/questions/33533148/how-do-i-specify-that-the-return-type-of-a-method-is-the-same-as-the-class-itsel
 from __future__ import annotations
 import taos
-import crash_gen
 from util.sql import *
 from util.cases import *
 from util.dnodes import *
@@ -42,6 +41,9 @@ import os
 import io
 import signal
 import traceback
+import resource
+from guppy import hpy
+import gc

 try:
    import psutil
@@ -53,14 +55,13 @@ except:
 if sys.version_info[0] < 3:
    raise Exception("Must be using Python 3")

-
 # Global variables, tried to keep a small number.

 # Command-line/Environment Configurations, will set a bit later
 # ConfigNameSpace = argparse.Namespace
 gConfig = argparse.Namespace()  # Dummy value, will be replaced later
 gSvcMgr = None # TODO: refactor this hack, use dep injection
-logger = None
+logger = None # type: Logger

 def runThread(wt: WorkerThread):
    wt.run()
@@ -101,7 +102,7 @@ class WorkerThread:
            else:
                raise RuntimeError("Unexpected connector type: {}".format(gConfig.connector_type))

-        self._dbInUse = False  # if "use db" was executed already
+        # self._dbInUse = False  # if "use db" was executed already

    def logDebug(self, msg):
        logger.debug("    TRD[{}] {}".format(self._tid, msg))
@@ -109,13 +110,13 @@ class WorkerThread:
    def logInfo(self, msg):
        logger.info("    TRD[{}] {}".format(self._tid, msg))

-    def dbInUse(self):
-        return self._dbInUse
+    # def dbInUse(self):
+    #     return self._dbInUse

-    def useDb(self):
-        if (not self._dbInUse):
-            self.execSql("use db")
-        self._dbInUse = True
+    # def useDb(self):
+    #     if (not self._dbInUse):
+    #         self.execSql("use db")
+    #     self._dbInUse = True

    def getTaskExecutor(self):
        return self._tc.getTaskExecutor()
@@ -161,12 +162,12 @@ class WorkerThread:
                logger.debug("[TRD] Thread Coordinator not running any more, worker thread now stopping...")
                break

-            # Before we fetch the task and run it, let's ensure we properly "use" the database
+            # Before we fetch the task and run it, let's ensure we properly "use" the database (not needed any more)
            try:
                if (gConfig.per_thread_db_connection):  # most likely TRUE
                    if not self._dbConn.isOpen:  # might have been closed during server auto-restart
                        self._dbConn.open()
-                self.useDb() # might encounter exceptions. TODO: catch
+                # self.useDb() # might encounter exceptions. TODO: catch
            except taos.error.ProgrammingError as err:
                errno = Helper.convertErrno(err.errno)
                if errno in [0x383, 0x386, 0x00B, 0x014]  : # invalid database, dropping, Unable to establish connection, Database not ready
@@ -181,14 +182,13 @@ class WorkerThread:
            task = tc.fetchTask()

            # Execute such a task
-            logger.debug(
-                "[TRD] Worker thread [{}] about to execute task: {}".format(
+            logger.debug("[TRD] Worker thread [{}] about to execute task: {}".format(
                    self._tid, task.__class__.__name__))
            task.execute(self)
            tc.saveExecutedTask(task)
            logger.debug("[TRD] Worker thread [{}] finished executing task".format(self._tid))

-            self._dbInUse = False  # there may be changes between steps
+            # self._dbInUse = False  # there may be changes between steps
        # print("_wtd", end=None) # worker thread died

    def verifyThreadSelf(self):  # ensure we are called by this own thread
@@ -237,7 +237,7 @@ class WorkerThread:
    def getQueryResult(self):
        return self.getDbConn().getQueryResult()

-    def getDbConn(self):
+    def getDbConn(self) -> DbConn :
        if (gConfig.per_thread_db_connection):
            return self._dbConn
        else:
@@ -255,7 +255,7 @@ class WorkerThread:
 class ThreadCoordinator:
    WORKER_THREAD_TIMEOUT = 60 # one minute

-    def __init__(self, pool: ThreadPool, dbManager):
+    def __init__(self, pool: ThreadPool, dbManager: DbManager):
        self._curStep = -1  # first step is 0
        self._pool = pool
        # self._wd = wd
@@ -268,6 +268,7 @@ class ThreadCoordinator:
            self._pool.numThreads + 1)  # one barrier for all threads
        self._execStats = ExecutionStats()
        self._runStatus = MainExec.STATUS_RUNNING
+        self._initDbs()

    def getTaskExecutor(self):
        return self._te
@@ -318,7 +319,7 @@ class ThreadCoordinator:
        logger.debug("[TRD] Main thread waking up at step {}, tapping worker threads".format(
                self._curStep))  # Now not all threads had time to go to sleep
        # Worker threads will wake up at this point, and each execute it's own task
-        self.tapAllThreads() # release all worker thread from their "gate"
+        self.tapAllThreads() # release all worker thread from their "gates"

    def _syncAtBarrier(self):
         # Now main thread (that's us) is ready to enter a step
@@ -332,12 +333,16 @@ class ThreadCoordinator:
    def _doTransition(self):
        transitionFailed = False
        try:
-            sm = self._dbManager.getStateMachine()
-            logger.debug("[STT] starting transitions")
+            for x in self._dbs:
+                db = x # type: Database
+                sm = db.getStateMachine()
+                logger.debug("[STT] starting transitions for DB: {}".format(db.getName()))
                # at end of step, transiton the DB state
-            sm.transition(self._executedTasks)
-            logger.debug("[STT] transition ended")
-            # Due to limitation (or maybe not) of the Python library,
+                tasksForDb = db.filterTasks(self._executedTasks)
+                sm.transition(tasksForDb, self.getDbManager().getDbConn())
+                logger.debug("[STT] transition ended for DB: {}".format(db.getName()))
+
+            # Due to limitation (or maybe not) of the TD Python library,
            # we cannot share connections across threads
            # Here we are in main thread, we cannot operate the connections created in workers
            # Moving below to task loop
@@ -347,6 +352,7 @@ class ThreadCoordinator:
            #         t.useDb()
                    # t.execSql("use db") # main thread executing "use
                    # db" on behalf of every worker thread
+
        except taos.error.ProgrammingError as err:
            if (err.msg == 'network unavailable'):  # broken DB connection
                logger.info("DB connection broken, execution failed")
@@ -358,7 +364,7 @@ class ThreadCoordinator:
                # end, and maybe signal them to stop
            else:
                raise
-        return transitionFailed
+        # return transitionFailed # Why did we have this??!!

        self.resetExecutedTasks()  # clear the tasks after we are done
        # Get ready for next step
@@ -378,6 +384,14 @@ class ThreadCoordinator:
        while not self._runShouldEnd(transitionFailed, hasAbortedTask, workerTimeout):
            if not gConfig.debug: # print this only if we are not in debug mode                
                print(".", end="", flush=True)
+            # if (self._curStep % 2) == 0: # print memory usage once every 10 steps
+            #     memUsage = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
+            #     print("[m:{}]".format(memUsage), end="", flush=True) # print memory usage
+            # if (self._curStep % 10) == 3: 
+            #     h = hpy()
+            #     print("\n")        
+            #     print(h.heap())
+            
                        
            try:
                self._syncAtBarrier() # For now just cross the barrier
@@ -407,6 +421,7 @@ class ThreadCoordinator:
                errno2 = Helper.convertErrno(err.errno)  # correct error scheme
                errMsg = "Transition failed: errno=0x{:X}, msg: {}".format(errno2, err)
                logger.info(errMsg)
+                traceback.print_exc()
                self._execStats.registerFailure(errMsg)

            # Then we move on to the next step
@@ -430,6 +445,19 @@ class ThreadCoordinator:
        logger.info("\nAll worker threads finished")
        self._execStats.endExec()

+    def cleanup(self): # free resources
+        self._pool.cleanup()
+
+        self._pool = None
+        self._te = None  
+        self._dbManager = None
+        self._executedTasks = None
+        self._lock = None
+        self._stepBarrier = None
+        self._execStats = None
+        self._runStatus = None
+
+
    def printStats(self):
        self._execStats.printStats()

@@ -458,23 +486,34 @@ class ThreadCoordinator:
    def isRunning(self):
        return self._te is not None

+    def _initDbs(self):
+        ''' Initialize multiple databases, invoked at __ini__() time '''
+        self._dbs = [] # type: List[Database]
+        dbc = self.getDbManager().getDbConn()
+        if gConfig.max_dbs == 0:
+            self._dbs.append(Database(0, dbc))
+        else:
+            for i in range(gConfig.max_dbs):
+                self._dbs.append(Database(i, dbc))
+
+    def pickDatabase(self):
+        idxDb = 0
+        if gConfig.max_dbs != 0 :
+            idxDb = Dice.throw(gConfig.max_dbs) # 0 to N-1
+        db = self._dbs[idxDb] # type: Database
+        return db
+
    def fetchTask(self) -> Task:
+        ''' The thread coordinator (that's us) is responsible for fetching a task
+            to be executed next.
+        '''
        if (not self.isRunning()):  # no task
            raise RuntimeError("Cannot fetch task when not running")
-        # return self._wd.pickTask()
-        # Alternatively, let's ask the DbState for the appropriate task
-        # dbState = self.getDbState()
-        # tasks = dbState.getTasksAtState() # TODO: create every time?
-        # nTasks = len(tasks)
-        # i = Dice.throw(nTasks)
-        # logger.debug(" (dice:{}/{}) ".format(i, nTasks))
-        # # return copy.copy(tasks[i]) # Needs a fresh copy, to save execution results, etc.
-        # return tasks[i].clone() # TODO: still necessary?
+
        # pick a task type for current state
-        taskType = self.getDbManager().getStateMachine().pickTaskType()
-        return taskType(
-            self.getDbManager(),
-            self._execStats)  # create a task from it
+        db = self.pickDatabase()
+        taskType = db.getStateMachine().pickTaskType() # type: Task
+        return taskType(self._execStats, db)  # create a task from it

    def resetExecutedTasks(self):
        self._executedTasks = []  # should be under single thread
@@ -510,6 +549,9 @@ class ThreadPool:
            logger.debug("Joining thread...")
            workerThread._thread.join()

+    def cleanup(self):
+        self.threadList = None # maybe clean up each?
+
 # A queue of continguous POSITIVE integers, used by DbManager to generate continuous numbers
 # for new table names

@@ -632,17 +674,6 @@ class DbConn:
        logger.debug("[DB] data connection opened, type = {}".format(self._type))
        self.isOpen = True

-    def resetDb(self):  # reset the whole database, etc.
-        if (not self.isOpen):
-            raise RuntimeError("Cannot reset database until connection is open")
-        # self._tdSql.prepare() # Recreate database, etc.
-
-        self.execute('drop database if exists db')
-        logger.debug("Resetting DB, dropped database")
-        # self._cursor.execute('create database db')
-        # self._cursor.execute('use db')
-        # tdSql.execute('show databases')
-
    def queryScalar(self, sql) -> int:
        return self._queryAny(sql)

@@ -654,7 +685,10 @@ class DbConn:
            raise RuntimeError("Cannot query database until connection is open")
        nRows = self.query(sql)
        if nRows != 1:
-            raise RuntimeError("Unexpected result for query: {}, rows = {}".format(sql, nRows))
+            raise taos.error.ProgrammingError(
+                "Unexpected result for query: {}, rows = {}".format(sql, nRows), 
+                (0x991 if nRows==0 else 0x992)
+            )
        if self.getResultRows() != 1 or self.getResultCols() != 1:
            raise RuntimeError("Unexpected result set for query: {}".format(sql))
        return self.getQueryResult()[0][0]
@@ -662,16 +696,32 @@ class DbConn:
    def use(self, dbName):
        self.execute("use {}".format(dbName))

-    def hasDatabases(self):
-        return self.query("show databases") > 1 # We now have a "log" database by default
+    def existsDatabase(self, dbName: str):
+        ''' Check if a certain database exists '''
+        self.query("show databases")
+        dbs = [v[0] for v in self.getQueryResult()] # ref: https://stackoverflow.com/questions/643823/python-list-transformation
+        # ret2 = dbName in dbs
+        # print("dbs = {}, str = {}, ret2={}, type2={}".format(dbs, dbName,ret2, type(dbName)))
+        return dbName in dbs # TODO: super weird type mangling seen, once here

    def hasTables(self):
        return self.query("show tables") > 0

    def execute(self, sql):
+        ''' Return the number of rows affected'''
        raise RuntimeError("Unexpected execution, should be overriden")

+    def safeExecute(self, sql):
+        '''Safely execute any SQL query, returning True/False upon success/failure'''
+        try:
+            self.execute(sql)
+            return True # ignore num of results, return success
+        except taos.error.ProgrammingError as err:
+            return False # failed, for whatever TAOS reason
+        # Not possile to reach here, non-TAOS exception would have been thrown
+
    def query(self, sql) -> int: # return num rows returned
+        ''' Return the number of rows affected'''
        raise RuntimeError("Unexpected execution, should be overriden")

    def openByType(self):
@@ -766,6 +816,13 @@ class DbConnRest(DbConn):


 class MyTDSql:
+    # Class variables
+    _clsLock = threading.Lock() # class wide locking
+    longestQuery = None # type: str
+    longestQueryTime = 0.0 # seconds
+    lqStartTime = 0.0
+    # lqEndTime = 0.0 # Not needed, as we have the two above already
+
    def __init__(self, hostAddr, cfgPath):
        # Make the DB connection
        self._conn = taos.connect(host=hostAddr, config=cfgPath) 
@@ -782,13 +839,28 @@ class MyTDSql:
        #     self.cursor.log(caller.filename + ".sql")

    def close(self):
+        self._cursor.close() # can we double close?
        self._conn.close() # TODO: very important, cursor close does NOT close DB connection!
        self._cursor.close()

+    def _execInternal(self, sql):
+        startTime = time.time() 
+        ret = self._cursor.execute(sql)
+        # print("\nSQL success: {}".format(sql))
+        queryTime =  time.time() - startTime
+        # Record the query time
+        cls = self.__class__
+        if queryTime > (cls.longestQueryTime + 0.01) :
+            with cls._clsLock:
+                cls.longestQuery = sql
+                cls.longestQueryTime = queryTime
+                cls.lqStartTime = startTime
+        return ret
+
    def query(self, sql):
        self.sql = sql
        try:
-            self._cursor.execute(sql)
+            self._execInternal(sql)
            self.queryResult = self._cursor.fetchall()
            self.queryRows = len(self.queryResult)
            self.queryCols = len(self._cursor.description)
@@ -802,7 +874,7 @@ class MyTDSql:
    def execute(self, sql):
        self.sql = sql
        try:
-            self.affectedRows = self._cursor.execute(sql)
+            self.affectedRows = self._execInternal(sql)
        except Exception as e:
            # caller = inspect.getframeinfo(inspect.stack()[1][0])
            # args = (caller.filename, caller.lineno, sql, repr(e))
@@ -922,6 +994,8 @@ class AnyState:

    STATE_VAL_IDX = 0
    CAN_CREATE_DB = 1
+    # For below, if we can "drop the DB", but strictly speaking 
+    # only "under normal circumstances", as we may override it with the -b option
    CAN_DROP_DB = 2  
    CAN_CREATE_FIXED_SUPER_TABLE = 3
    CAN_DROP_FIXED_SUPER_TABLE = 4
@@ -935,6 +1009,8 @@ class AnyState:
        # -1 hack to accomodate the STATE_INVALID case
        return self._stateNames[self._info[self.STATE_VAL_IDX] + 1]

+    # Each sub state tells us the "info", about itself, so we can determine
+    # on things like canDropDB()
    def getInfo(self):
        raise RuntimeError("Must be overriden by child classes")

@@ -961,6 +1037,10 @@ class AnyState:
        return self._info[self.CAN_CREATE_DB]

    def canDropDb(self):
+        # If user requests to run up to a number of DBs,
+        # we'd then not do drop_db operations any more
+        if gConfig.max_dbs > 0 : 
+            return False
        return self._info[self.CAN_DROP_DB]

    def canCreateFixedSuperTable(self):
@@ -997,8 +1077,8 @@ class AnyState:
            if task.isSuccess():
                sCnt += 1
        if (exists and sCnt <= 0):
-            raise RuntimeError(
-                "Unexpected zero success for task: {}".format(cls))
+            raise RuntimeError("Unexpected zero success for task type: {}, from tasks: {}"
+                .format(cls, tasks))

    def assertNoTask(self, tasks, cls):
        for task in tasks:
@@ -1145,13 +1225,16 @@ class StateHasData(AnyState):


 class StateMechine:
-    def __init__(self, dbConn):
-        self._dbConn = dbConn
-        self._curState = self._findCurrentState()  # starting state
-        # transitition target probabilities, indexed with value of STATE_EMPTY,
-        # STATE_DB_ONLY, etc.
+    def __init__(self, db: Database): 
+        self._db = db
+        # transitition target probabilities, indexed with value of STATE_EMPTY, STATE_DB_ONLY, etc.
        self._stateWeights = [1, 2, 10, 40]

+    def init(self, dbc: DbConn): # late initailization, don't save the dbConn
+        self._curState = self._findCurrentState(dbc)  # starting state
+        logger.debug("Found Starting State: {}".format(self._curState))
+
+    # TODO: seems no lnoger used, remove?
    def getCurrentState(self):
        return self._curState

@@ -1193,34 +1276,35 @@ class StateMechine:
                typesToStrings(taskTypes)))
        return taskTypes

-    def _findCurrentState(self):
-        dbc = self._dbConn
+    def _findCurrentState(self, dbc: DbConn):
        ts = time.time()  # we use this to debug how fast/slow it is to do the various queries to find the current DB state
-        if not dbc.hasDatabases():  # no database?!
+        dbName =self._db.getName()
+        if not dbc.existsDatabase(dbName): # dbc.hasDatabases():  # no database?!
            logger.debug( "[STT] empty database found, between {} and {}".format(ts, time.time()))
            return StateEmpty()
        # did not do this when openning connection, and this is NOT the worker
        # thread, which does this on their own
-        dbc.use("db")
+        dbc.use(dbName)
        if not dbc.hasTables():  # no tables
            logger.debug("[STT] DB_ONLY found, between {} and {}".format(ts, time.time()))
            return StateDbOnly()

-        sTable = DbManager.getFixedSuperTable()
-        if sTable.hasRegTables(dbc):  # no regular tables
+        sTable = self._db.getFixedSuperTable()
+        if sTable.hasRegTables(dbc, dbName):  # no regular tables
            logger.debug("[STT] SUPER_TABLE_ONLY found, between {} and {}".format(ts, time.time()))
            return StateSuperTableOnly()
        else:  # has actual tables
            logger.debug("[STT] HAS_DATA found, between {} and {}".format(ts, time.time()))
            return StateHasData()

-    def transition(self, tasks):
+    # We transition the system to a new state by examining the current state itself
+    def transition(self, tasks, dbc: DbConn):
        if (len(tasks) == 0):  # before 1st step, or otherwise empty
            logger.debug("[STT] Starting State: {}".format(self._curState))
            return  # do nothing

        # this should show up in the server log, separating steps
-        self._dbConn.execute("show dnodes")
+        dbc.execute("show dnodes")

        # Generic Checks, first based on the start state
        if self._curState.canCreateDb():
@@ -1251,7 +1335,7 @@ class StateMechine:
        # if self._state.canReadData():
            # Nothing for sure

-        newState = self._findCurrentState()
+        newState = self._findCurrentState(dbc)
        logger.debug("[STT] New DB state determined: {}".format(newState))
        # can old state move to new state through the tasks?
        self._curState.verifyTasksToState(tasks, newState)
@@ -1283,49 +1367,53 @@ class StateMechine:
            if rnd < 0:
                return i

-# Manager of the Database Data/Connection
+class Database:
+    ''' We use this to represent an actual TDengine database inside a service instance,
+        possibly in a cluster environment.

+        For now we use it to manage state transitions in that database
+    '''
+    _clsLock = threading.Lock() # class wide lock
+    _lastInt = 101  # next one is initial integer
+    _lastTick = 0
+    _lastLaggingTick = 0 # lagging tick, for unsequenced insersions
+
+    def __init__(self, dbNum: int, dbc: DbConn): # TODO: remove dbc
+        self._dbNum = dbNum # we assign a number to databases, for our testing purpose
+        self._stateMachine = StateMechine(self)
+        self._stateMachine.init(dbc)
          
-class DbManager():
-    def __init__(self, resetDb=True):
-        self.tableNumQueue = LinearQueue()
-        # datetime.datetime(2019, 1, 1) # initial date time tick
-        self._lastTick = self.setupLastTick()
-        self._lastInt = 0  # next one is initial integer
        self._lock = threading.RLock()

-        # self.openDbServerConnection()
-        self._dbConn = DbConn.createNative() if (
-            gConfig.connector_type == 'native') else DbConn.createRest()
-        try:
-            self._dbConn.open()  # may throw taos.error.ProgrammingError: disconnected
-        except taos.error.ProgrammingError as err:
-            # print("Error type: {}, msg: {}, value: {}".format(type(err), err.msg, err))
-            if (err.msg == 'client disconnected'):  # cannot open DB connection
-                print(
-                    "Cannot establish DB connection, please re-run script without parameter, and follow the instructions.")
-                sys.exit(2)
-            else:
-                print("Failed to connect to DB, errno = {}, msg: {}".format(Helper.convertErrno(err.errno), err.msg))
-                raise
-        except BaseException:
-            print("[=] Unexpected exception")
-            raise
+    def getStateMachine(self) -> StateMechine:
+        return self._stateMachine

-        if resetDb:
-            self._dbConn.resetDb()  # drop and recreate DB
+    def getDbNum(self):
+        return self._dbNum

-        # Do this after dbConn is in proper shape
-        self._stateMachine = StateMechine(self._dbConn)
+    def getName(self):
+        return "db_{}".format(self._dbNum)

-    def getDbConn(self):
-        return self._dbConn
+    def filterTasks(self, inTasks: List[Task]): # Pick out those belonging to us
+        outTasks = []
+        for task in inTasks:
+            if task.getDb().isSame(self):
+                outTasks.append(task)
+        return outTasks

-    def getStateMachine(self) -> StateMechine:
-        return self._stateMachine
+    def isSame(self, other):
+        return self._dbNum == other._dbNum
+
+    def exists(self, dbc: DbConn):
+        return dbc.existsDatabase(self.getName())

-    # def getState(self):
-    #     return self._stateMachine.getCurrentState()
+    @classmethod
+    def getFixedSuperTableName(cls):
+        return "fs_table"
+
+    @classmethod
+    def getFixedSuperTable(cls) -> TdSuperTable:
+        return TdSuperTable(cls.getFixedSuperTableName())

    # We aim to create a starting time tick, such that, whenever we run our test here once
    # We should be able to safely create 100,000 records, which will not have any repeated time stamp
@@ -1333,7 +1421,8 @@ class DbManager():
    # by a factor of 500.
    # TODO: what if it goes beyond 10 years into the future
    # TODO: fix the error as result of above: "tsdb timestamp is out of range"
-    def setupLastTick(self):
+    @classmethod
+    def setupLastTick(cls):
        t1 = datetime.datetime(2020, 6, 1)
        t2 = datetime.datetime.now()
        # maybe a very large number, takes 69 years to exceed Python int range
@@ -1347,33 +1436,22 @@ class DbManager():
        logger.info("Setting up TICKS to start from: {}".format(t4))
        return t4

-    def pickAndAllocateTable(self):  # pick any table, and "use" it
-        return self.tableNumQueue.pickAndAllocate()
-
-    def addTable(self):
-        with self._lock:
-            tIndex = self.tableNumQueue.push()
-        return tIndex
-
    @classmethod
-    def getFixedSuperTableName(cls):
-        return "fs_table"
-
-    @classmethod
-    def getFixedSuperTable(cls):
-        return TdSuperTable(cls.getFixedSuperTableName())
-
-    def releaseTable(self, i):  # return the table back, so others can use it
-        self.tableNumQueue.release(i)
-
-    def getNextTick(self):
-        with self._lock:  # prevent duplicate tick
-            if Dice.throw(20) == 0:  # 1 in 20 chance
-                return self._lastTick + datetime.timedelta(0, -100) # Go back in time 100 seconds
+    def getNextTick(cls):        
+        with cls._clsLock:  # prevent duplicate tick
+            if cls._lastLaggingTick==0:
+                # 10k at 1/20 chance, should be enough to avoid overlaps
+                cls._lastLaggingTick = cls.setupLastTick() + datetime.timedelta(0, -10000)                 
+            if cls._lastTick==0: # should be quite a bit into the future
+                cls._lastTick = cls.setupLastTick()  
+
+            if Dice.throw(20) == 0:  # 1 in 20 chance, return lagging tick
+                cls._lastLaggingTick += datetime.timedelta(0, 1) # Go back in time 100 seconds
+                return cls._lastLaggingTick 
            else:  # regular
                # add one second to it
-                self._lastTick += datetime.timedelta(0, 1)
-                return self._lastTick
+                cls._lastTick += datetime.timedelta(0, 1)
+                return cls._lastTick

    def getNextInt(self):
        with self._lock:
@@ -1389,6 +1467,55 @@ class DbManager():
        # print("Float obtained: {}".format(ret))
        return ret

+
+class DbManager():
+    ''' This is a wrapper around DbConn(), to make it easier to use. 
+
+        TODO: rename this to DbConnManager
+    '''
+    def __init__(self):
+        self.tableNumQueue = LinearQueue() # TODO: delete?
+        # self.openDbServerConnection()
+        self._dbConn = DbConn.createNative() if (
+            gConfig.connector_type == 'native') else DbConn.createRest()
+        try:
+            self._dbConn.open()  # may throw taos.error.ProgrammingError: disconnected
+        except taos.error.ProgrammingError as err:
+            # print("Error type: {}, msg: {}, value: {}".format(type(err), err.msg, err))
+            if (err.msg == 'client disconnected'):  # cannot open DB connection
+                print(
+                    "Cannot establish DB connection, please re-run script without parameter, and follow the instructions.")
+                sys.exit(2)
+            else:
+                print("Failed to connect to DB, errno = {}, msg: {}"
+                    .format(Helper.convertErrno(err.errno), err.msg))
+                raise
+        except BaseException:
+            print("[=] Unexpected exception")
+            raise
+
+        # Do this after dbConn is in proper shape
+        # Moved to Database()
+        # self._stateMachine = StateMechine(self._dbConn)
+
+    def getDbConn(self):
+        return self._dbConn
+
+    # TODO: not used any more, to delete
+    def pickAndAllocateTable(self):  # pick any table, and "use" it
+        return self.tableNumQueue.pickAndAllocate()
+
+    # TODO: Not used any more, to delete
+    def addTable(self):
+        with self._lock:
+            tIndex = self.tableNumQueue.push()
+        return tIndex
+
+    # Not used any more, to delete
+    def releaseTable(self, i):  # return the table back, so others can use it
+        self.tableNumQueue.release(i)    
+
+    # TODO: not used any more, delete
    def getTableNameToDelete(self):
        tblNum = self.tableNumQueue.pop()  # TODO: race condition!
        if (not tblNum):  # maybe false
@@ -1399,7 +1526,6 @@ class DbManager():
    def cleanUp(self):
        self._dbConn.close()

-
 class TaskExecutor():
    class BoundedList:
        def __init__(self, size=10):
@@ -1465,6 +1591,10 @@ class TaskExecutor():


 class Task():
+    ''' A generic "Task" to be executed. For now we decide that there is no
+        need to embed a DB connection here, we use whatever the Worker Thread has
+        instead. But a task is always associated with a DB
+    '''
    taskSn = 100

    @classmethod
@@ -1473,10 +1603,9 @@ class Task():
        # logger.debug("Allocating taskSN: {}".format(Task.taskSn))
        return Task.taskSn

-    def __init__(self, dbManager: DbManager, execStats: ExecutionStats):
-        self._dbManager = dbManager
+    def __init__(self, execStats: ExecutionStats, db: Database):
        self._workerThread = None
-        self._err = None
+        self._err = None # type: Exception
        self._aborted = False
        self._curStep = None
        self._numRows = None  # Number of rows affected
@@ -1486,6 +1615,7 @@ class Task():
        # logger.debug("Creating new task {}...".format(self._taskNum))

        self._execStats = execStats
+        self._db = db # A task is always associated/for a specific DB

    def isSuccess(self):
        return self._err is None
@@ -1494,9 +1624,12 @@ class Task():
        return self._aborted

    def clone(self):  # TODO: why do we need this again?
-        newTask = self.__class__(self._dbManager, self._execStats)
+        newTask = self.__class__(self._execStats, self._db)
        return newTask

+    def getDb(self):
+        return self._db
+
    def logDebug(self, msg):
        self._workerThread.logDebug(
            "Step[{}.{}] {}".format(
@@ -1515,6 +1648,7 @@ class Task():
    def _isErrAcceptable(self, errno, msg):
        if errno in [
                0x05,  # TSDB_CODE_RPC_NOT_READY
+                0x0B,  # Unable to establish connection, more details in TD-1648
                # 0x200, # invalid SQL， TODO: re-examine with TD-934
                0x217, # "db not selected", client side defined error code
                0x218, # "Table does not exist" client side defined error code
@@ -1557,9 +1691,12 @@ class Task():
        self.logDebug(
            "[-] executing task {}...".format(self.__class__.__name__))

-        self._err = None
+        self._err = None # TODO: type hint mess up?
        self._execStats.beginTaskType(self.__class__.__name__)  # mark beginning
        errno2 = None
+
+        # Now pick a database, and stick with it for the duration of the task execution
+        dbName = self._db.getName()
        try:
            self._executeInternal(te, wt)  # TODO: no return value?
        except taos.error.ProgrammingError as err:
@@ -1597,7 +1734,7 @@ class Task():
            self._err = e
            self._aborted = True
            traceback.print_exc()
-        except BaseException:
+        except BaseException: # TODO: what is this again??!!
            self.logDebug(
                "[=] Unexpected exception, SQL: {}".format(
                    wt.getDbConn().getLastSql()))
@@ -1609,10 +1746,9 @@ class Task():
        # TODO: merge with above.
        self._execStats.incExecCount(self.__class__.__name__, self.isSuccess(), errno2)

-    def execSql(self, sql):
-        return self._dbManager.execute(sql)
-
+    # TODO: refactor away, just provide the dbConn
    def execWtSql(self, wt: WorkerThread, sql):  # execute an SQL on the worker thread
+        """ Haha """
        return wt.execSql(sql)

    def queryWtSql(self, wt: WorkerThread, sql):  # execute an SQL on the worker thread
@@ -1714,7 +1850,11 @@ class ExecutionStats:
            "| Total Elapsed Time (from wall clock): {:.3f} seconds".format(
                self._elapsedTime))
        logger.info("| Top numbers written: {}".format(TaskExecutor.getBoundedList()))
-        logger.info("| Total Number of Active DB Native Connections: {}".format(DbConnNative.totalConnections))
+        logger.info("| Active DB Native Connections (now): {}".format(DbConnNative.totalConnections))
+        logger.info("| Longest native query time: {:.3f} seconds, started: {}".
+            format(MyTDSql.longestQueryTime, 
+                time.strftime("%x %X", time.localtime(MyTDSql.lqStartTime))) )
+        logger.info("| Longest native query: {}".format(MyTDSql.longestQuery))
        logger.info(
            "----------------------------------------------------------------------")

@@ -1764,9 +1904,15 @@ class TaskCreateDb(StateTransitionTask):
    def canBeginFrom(cls, state: AnyState):
        return state.canCreateDb()

+    # Actually creating the database(es)
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
-        # self.execWtSql(wt, "create database db replica {}".format(Dice.throw(3)+1))
-        self.execWtSql(wt, "create database db")
+        # was: self.execWtSql(wt, "create database db")
+        repStr = ""
+        if gConfig.max_replicas != 1:
+            numReplica = Dice.throw(gConfig.max_replicas) + 1 # 1,2 ... N
+            repStr = "replica {}".format(numReplica)
+        self.execWtSql(wt, "create database {} {}"
+            .format(self._db.getName(), repStr) )

 class TaskDropDb(StateTransitionTask):
    @classmethod
@@ -1778,10 +1924,9 @@ class TaskDropDb(StateTransitionTask):
        return state.canDropDb()

    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
-        self.execWtSql(wt, "drop database db")
+        self.execWtSql(wt, "drop database {}".format(self._db.getName()))
        logger.debug("[OPS] database dropped at {}".format(time.time()))

-
 class TaskCreateSuperTable(StateTransitionTask):
    @classmethod
    def getEndState(cls):
@@ -1792,13 +1937,14 @@ class TaskCreateSuperTable(StateTransitionTask):
        return state.canCreateFixedSuperTable()

    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
-        if not wt.dbInUse():  # no DB yet, to the best of our knowledge
+        if not self._db.exists(wt.getDbConn()):
            logger.debug("Skipping task, no DB yet")
            return

-        sTable = self._dbManager.getFixedSuperTable()
+        sTable = self._db.getFixedSuperTable() # type: TdSuperTable
        # wt.execSql("use db")    # should always be in place
-        sTable.create(wt.getDbConn(), {'ts':'timestamp', 'speed':'int'}, {'b':'binary(200)', 'f':'float'})
+        sTable.create(wt.getDbConn(), self._db.getName(), 
+            {'ts':'timestamp', 'speed':'int'}, {'b':'binary(200)', 'f':'float'})
        # self.execWtSql(wt,"create table db.{} (ts timestamp, speed int) tags (b binary(200), f float) ".format(tblName))
        # No need to create the regular tables, INSERT will do that
        # automatically
@@ -1811,17 +1957,20 @@ class TdSuperTable:
    def getName(self):
        return self._stName

-    def create(self, dbc, cols: dict, tags: dict):
-        sql = "CREATE TABLE db.{} ({}) TAGS ({})".format(
+    # TODO: odd semantic, create() method is usually static?
+    def create(self, dbc, dbName, cols: dict, tags: dict):
+        '''Creating a super table'''
+        sql = "CREATE TABLE {}.{} ({}) TAGS ({})".format(
+            dbName,
            self._stName,
            ",".join(['%s %s'%(k,v) for (k,v) in cols.items()]),
            ",".join(['%s %s'%(k,v) for (k,v) in tags.items()])
            )
        dbc.execute(sql)        

-    def getRegTables(self, dbc: DbConn):
+    def getRegTables(self, dbc: DbConn, dbName: str):
        try:
-            dbc.query("select TBNAME from db.{}".format(self._stName))  # TODO: analyze result set later            
+            dbc.query("select TBNAME from {}.{}".format(dbName, self._stName))  # TODO: analyze result set later            
        except taos.error.ProgrammingError as err:                    
            errno2 = Helper.convertErrno(err.errno) 
            logger.debug("[=] Failed to get tables from super table: errno=0x{:X}, msg: {}".format(errno2, err))
@@ -1830,20 +1979,20 @@ class TdSuperTable:
        qr = dbc.getQueryResult()
        return [v[0] for v in qr] # list transformation, ref: https://stackoverflow.com/questions/643823/python-list-transformation

-    def hasRegTables(self, dbc: DbConn):
-        return dbc.query("SELECT * FROM db.{}".format(self._stName)) > 0
+    def hasRegTables(self, dbc: DbConn, dbName: str):
+        return dbc.query("SELECT * FROM {}.{}".format(dbName, self._stName)) > 0

-    def ensureTable(self, dbc: DbConn, regTableName: str):
-        sql = "select tbname from db.{} where tbname in ('{}')".format(self._stName, regTableName)
+    def ensureTable(self, dbc: DbConn, dbName: str, regTableName: str):
+        sql = "select tbname from {}.{} where tbname in ('{}')".format(dbName, self._stName, regTableName)
        if dbc.query(sql) >= 1 : # reg table exists already
            return
-        sql = "CREATE TABLE {} USING {} tags ({})".format(
-            regTableName, self._stName, self._getTagStrForSql(dbc)
+        sql = "CREATE TABLE {}.{} USING {}.{} tags ({})".format(
+            dbName, regTableName, dbName, self._stName, self._getTagStrForSql(dbc, dbName)
        )
        dbc.execute(sql)

-    def _getTagStrForSql(self, dbc) :
-        tags = self._getTags(dbc)
+    def _getTagStrForSql(self, dbc, dbName: str) :
+        tags = self._getTags(dbc, dbName)
        tagStrs = []
        for tagName in tags: 
            tagType = tags[tagName]
@@ -1857,34 +2006,34 @@ class TdSuperTable:
                raise RuntimeError("Unexpected tag type: {}".format(tagType))
        return ", ".join(tagStrs)

-    def _getTags(self, dbc) -> dict:
-        dbc.query("DESCRIBE {}".format(self._stName))
+    def _getTags(self, dbc, dbName) -> dict:
+        dbc.query("DESCRIBE {}.{}".format(dbName, self._stName))
        stCols = dbc.getQueryResult()
        # print(stCols)
        ret = {row[0]:row[1] for row in stCols if row[3]=='TAG'} # name:type
        # print("Tags retrieved: {}".format(ret))
        return ret

-    def addTag(self, dbc, tagName, tagType):
-        if tagName in self._getTags(dbc): # already 
+    def addTag(self, dbc, dbName, tagName, tagType):
+        if tagName in self._getTags(dbc, dbName): # already 
            return
        # sTable.addTag("extraTag", "int")
-        sql = "alter table db.{} add tag {} {}".format(self._stName, tagName, tagType)
+        sql = "alter table {}.{} add tag {} {}".format(dbName, self._stName, tagName, tagType)
        dbc.execute(sql)

-    def dropTag(self, dbc, tagName):
-        if not tagName in self._getTags(dbc): # don't have this tag
+    def dropTag(self, dbc, dbName, tagName):
+        if not tagName in self._getTags(dbc, dbName): # don't have this tag
            return
-        sql = "alter table db.{} drop tag {}".format(self._stName, tagName)
+        sql = "alter table {}.{} drop tag {}".format(dbName, self._stName, tagName)
        dbc.execute(sql)

-    def changeTag(self, dbc, oldTag, newTag):
-        tags = self._getTags(dbc)
+    def changeTag(self, dbc, dbName, oldTag, newTag):
+        tags = self._getTags(dbc, dbName)
        if not oldTag in tags: # don't have this tag
            return
        if newTag in tags: # already have this tag
            return
-        sql = "alter table db.{} change tag {} {}".format(self._stName, oldTag, newTag)
+        sql = "alter table {}.{} change tag {} {}".format(dbName, self._stName, oldTag, newTag)
        dbc.execute(sql)

 class TaskReadData(StateTransitionTask):
@@ -1897,15 +2046,17 @@ class TaskReadData(StateTransitionTask):
        return state.canReadData()

    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
-        sTable = self._dbManager.getFixedSuperTable()
+        sTable = self._db.getFixedSuperTable()

-        if random.randrange(
-                5) == 0:  # 1 in 5 chance, simulate a broken connection. TODO: break connection in all situations
+        # 1 in 5 chance, simulate a broken connection. 
+        if random.randrange(5) == 0:  # TODO: break connection in all situations
            wt.getDbConn().close()
            wt.getDbConn().open()
+            print("_r", end="", flush=True)
        
        dbc = wt.getDbConn()
-        for rTbName in sTable.getRegTables(dbc):  # regular tables
+        dbName = self._db.getName()
+        for rTbName in sTable.getRegTables(dbc, dbName):  # regular tables
            aggExpr = Dice.choice([
                '*',
                'count(*)',
@@ -1931,10 +2082,10 @@ class TaskReadData(StateTransitionTask):
            ])
            try:
                # Run the query against the regular table first
-                dbc.execute("select {} from db.{}".format(aggExpr, rTbName))
+                dbc.execute("select {} from {}.{}".format(aggExpr, dbName, rTbName))
                # Then run it against the super table
                if aggExpr not in ['stddev(speed)']: #TODO: STDDEV not valid for super tables?!
-                    dbc.execute("select {} from db.{}".format(aggExpr, sTable.getName()))
+                    dbc.execute("select {} from {}.{}".format(aggExpr, dbName, sTable.getName()))
            except taos.error.ProgrammingError as err:                    
                errno2 = Helper.convertErrno(err.errno)
                logger.debug("[=] Read Failure: errno=0x{:X}, msg: {}, SQL: {}".format(errno2, err, dbc.getLastSql()))
@@ -1950,27 +2101,25 @@ class TaskDropSuperTable(StateTransitionTask):
        return state.canDropFixedSuperTable()

    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
-        # 1/2 chance, we'll drop the regular tables one by one, in a randomized
-        # sequence
+        # 1/2 chance, we'll drop the regular tables one by one, in a randomized sequence
        if Dice.throw(2) == 0:
+            # print("_7_", end="", flush=True)
            tblSeq = list(range(
                2 + (self.LARGE_NUMBER_OF_TABLES if gConfig.larger_data else self.SMALL_NUMBER_OF_TABLES)))
            random.shuffle(tblSeq)
            tickOutput = False  # if we have spitted out a "d" character for "drop regular table"
            isSuccess = True
            for i in tblSeq:
-                regTableName = self.getRegTableName(
-                    i)  # "db.reg_table_{}".format(i)
+                regTableName = self.getRegTableName(i)  # "db.reg_table_{}".format(i)
                try:
-                    self.execWtSql(wt, "drop table {}".format(
-                        regTableName))  # nRows always 0, like MySQL
+                    self.execWtSql(wt, "drop table {}.{}".
+                        format(self._db.getName(), regTableName))  # nRows always 0, like MySQL
                except taos.error.ProgrammingError as err:
                    # correcting for strange error number scheme                    
                    errno2 = Helper.convertErrno(err.errno)
                    if (errno2 in [0x362]):  # mnode invalid table name
                        isSuccess = False
-                        logger.debug(
-                            "[DB] Acceptable error when dropping a table")
+                        logger.debug("[DB] Acceptable error when dropping a table")
                    continue  # try to delete next regular table

                if (not tickOutput):
@@ -1981,8 +2130,8 @@ class TaskDropSuperTable(StateTransitionTask):
                        print("f", end="", flush=True)

        # Drop the super table itself
-        tblName = self._dbManager.getFixedSuperTableName()
-        self.execWtSql(wt, "drop table db.{}".format(tblName))
+        tblName = self._db.getFixedSuperTableName()
+        self.execWtSql(wt, "drop table {}.{}".format(self._db.getName(), tblName))


 class TaskAlterTags(StateTransitionTask):
@@ -1997,19 +2146,20 @@ class TaskAlterTags(StateTransitionTask):
    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
        # tblName = self._dbManager.getFixedSuperTableName()
        dbc = wt.getDbConn()
-        sTable = self._dbManager.getFixedSuperTable()
+        sTable = self._db.getFixedSuperTable()
+        dbName = self._db.getName()
        dice = Dice.throw(4)
        if dice == 0:
-            sTable.addTag(dbc, "extraTag", "int")
+            sTable.addTag(dbc, dbName, "extraTag", "int")
            # sql = "alter table db.{} add tag extraTag int".format(tblName)
        elif dice == 1:
-            sTable.dropTag(dbc, "extraTag")
+            sTable.dropTag(dbc, dbName, "extraTag")
            # sql = "alter table db.{} drop tag extraTag".format(tblName)
        elif dice == 2:
-            sTable.dropTag(dbc, "newTag")
+            sTable.dropTag(dbc, dbName, "newTag")
            # sql = "alter table db.{} drop tag newTag".format(tblName)
        else:  # dice == 3
-            sTable.changeTag(dbc, "extraTag", "newTag")
+            sTable.changeTag(dbc, dbName, "extraTag", "newTag")
            # sql = "alter table db.{} change tag extraTag newTag".format(tblName)

 class TaskRestartService(StateTransitionTask):
@@ -2074,7 +2224,9 @@ class TaskAddData(StateTransitionTask):
        return state.canAddData()

    def _executeInternal(self, te: TaskExecutor, wt: WorkerThread):
-        ds = self._dbManager # Quite DANGEROUS here, may result in multi-thread client access
+        # ds = self._dbManager # Quite DANGEROUS here, may result in multi-thread client access
+        db = self._db
+        dbc = wt.getDbConn()
        tblSeq = list(range(
                self.LARGE_NUMBER_OF_TABLES if gConfig.larger_data else self.SMALL_NUMBER_OF_TABLES))
        random.shuffle(tblSeq)
@@ -2084,23 +2236,25 @@ class TaskAddData(StateTransitionTask):
            else:
                self.activeTable.add(i)  # marking it active
            
-            sTable = ds.getFixedSuperTable()
+            sTable = db.getFixedSuperTable()
            regTableName = self.getRegTableName(i)  # "db.reg_table_{}".format(i)
-            sTable.ensureTable(wt.getDbConn(), regTableName)  # Ensure the table exists           
+            sTable.ensureTable(wt.getDbConn(), db.getName(), regTableName)  # Ensure the table exists           
           
            for j in range(self.LARGE_NUMBER_OF_RECORDS if gConfig.larger_data else self.SMALL_NUMBER_OF_RECORDS):  # number of records per table
-                nextInt = ds.getNextInt()
+                nextInt = db.getNextInt()
+                nextTick = db.getNextTick()
                if gConfig.record_ops:
                    self.prepToRecordOps()
                    self.fAddLogReady.write("Ready to write {} to {}\n".format(nextInt, regTableName))
                    self.fAddLogReady.flush()
                    os.fsync(self.fAddLogReady)
-                sql = "insert into {} values ('{}', {});".format( # removed: tags ('{}', {})
+                sql = "insert into {}.{} values ('{}', {});".format( # removed: tags ('{}', {})
+                    db.getName(),
                    regTableName,
                    # ds.getFixedSuperTableName(),
                    # ds.getNextBinary(), ds.getNextFloat(),
-                    ds.getNextTick(), nextInt)
-                self.execWtSql(wt, sql)
+                    nextTick, nextInt)
+                dbc.execute(sql)
                # Successfully wrote the data into the DB, let's record it
                # somehow
                te.recordDataMark(nextInt)
@@ -2110,6 +2264,27 @@ class TaskAddData(StateTransitionTask):
                            nextInt, regTableName))
                    self.fAddLogDone.flush()
                    os.fsync(self.fAddLogDone)
+
+                # Now read it back and verify, we might encounter an error if table is dropped
+                if gConfig.verify_data: # only if command line asks for it
+                    try:
+                        readBack = dbc.queryScalar("SELECT speed from {}.{} WHERE ts= '{}'".
+                            format(db.getName(), regTableName, nextTick))
+                        if readBack != nextInt :
+                            raise taos.error.ProgrammingError(
+                                "Failed to read back same data, wrote: {}, read: {}"
+                                .format(nextInt, readBack), 0x999)
+                    except taos.error.ProgrammingError as err:
+                        errno = Helper.convertErrno(err.errno)
+                        if errno in [0x991, 0x992]  : # not a single result
+                            raise taos.error.ProgrammingError(
+                                "Failed to read back same data for tick: {}, wrote: {}, read: {}"
+                                .format(nextTick, nextInt, "Empty Result" if errno==0x991 else "Multiple Result"),
+                                errno)
+                        # Re-throw no matter what
+                        raise
+                
+
            self.activeTable.discard(i)  # not raising an error, unlike remove


@@ -2178,7 +2353,7 @@ class SvcManager:
        self.inSigHandler = False
        # self._status = MainExec.STATUS_RUNNING # set inside
        # _startTaosService()
-        self.svcMgrThread = None
+        self.svcMgrThread = None # type: ServiceManagerThread
        self._lock = threading.Lock()
        self._isRestarting = False

@@ -2266,12 +2441,11 @@ class SvcManager:
                    proc.kill()
                # print("Process: {}".format(proc.name()))

+            
            self.svcMgrThread = ServiceManagerThread()  # create the object
            print("Attempting to start TAOS service started, printing out output...")
            self.svcMgrThread.start()            
-            self.svcMgrThread.procIpcBatch(
-                trimToTarget=10,
-                forceOutput=True)  # for printing 10 lines
+            self.svcMgrThread.procIpcBatch(trimToTarget=10, forceOutput=True)  # for printing 10 lines             
            print("TAOS service started")

    def stopTaosService(self, outputLines=20):
@@ -2320,7 +2494,7 @@ class ServiceManagerThread:
    MAX_QUEUE_SIZE = 10000

    def __init__(self):
-        self._tdeSubProcess = None
+        self._tdeSubProcess = None # type: TdeSubProcess
        self._thread = None
        self._status = None

@@ -2351,13 +2525,13 @@ class ServiceManagerThread:
        self._tdeSubProcess.start()

        self._ipcQueue = Queue()
-        self._thread = threading.Thread(
+        self._thread = threading.Thread( # First thread captures server OUTPUT
            target=self.svcOutputReader,
            args=(self._tdeSubProcess.getStdOut(), self._ipcQueue))
        self._thread.daemon = True  # thread dies with the program
        self._thread.start()

-        self._thread2 = threading.Thread(
+        self._thread2 = threading.Thread( # 2nd thread captures server ERRORs
            target=self.svcErrorReader,
            args=(self._tdeSubProcess.getStdErr(), self._ipcQueue))
        self._thread2.daemon = True  # thread dies with the program
@@ -2690,40 +2864,39 @@ class ClientManager:

        self.inSigHandler = False

-    def _printLastNumbers(self):  # to verify data durability
-        dbManager = DbManager(resetDb=False)
-        dbc = dbManager.getDbConn()
-        if dbc.query("show databases") <= 1:  # no database (we have a default called "log")
-            return
-        dbc.execute("use db")
-        if dbc.query("show tables") == 0:  # no tables
-            return
+    # TODO: need to revise how we verify data durability
+    # def _printLastNumbers(self):  # to verify data durability
+    #     dbManager = DbManager()
+    #     dbc = dbManager.getDbConn()
+    #     if dbc.query("show databases") <= 1:  # no database (we have a default called "log")
+    #         return
+    #     dbc.execute("use db")
+    #     if dbc.query("show tables") == 0:  # no tables
+    #         return

-        sTbName = dbManager.getFixedSuperTableName()
+    #     sTbName = dbManager.getFixedSuperTableName()

-        # get all regular tables
-        # TODO: analyze result set later
-        dbc.query("select TBNAME from db.{}".format(sTbName))
-        rTables = dbc.getQueryResult()
+    #     # get all regular tables
+    #     # TODO: analyze result set later
+    #     dbc.query("select TBNAME from db.{}".format(sTbName))
+    #     rTables = dbc.getQueryResult()

-        bList = TaskExecutor.BoundedList()
-        for rTbName in rTables:  # regular tables
-            dbc.query("select speed from db.{}".format(rTbName[0]))
-            numbers = dbc.getQueryResult()
-            for row in numbers:
-                # print("<{}>".format(n), end="", flush=True)
-                bList.add(row[0])
+    #     bList = TaskExecutor.BoundedList()
+    #     for rTbName in rTables:  # regular tables
+    #         dbc.query("select speed from db.{}".format(rTbName[0]))
+    #         numbers = dbc.getQueryResult()
+    #         for row in numbers:
+    #             # print("<{}>".format(n), end="", flush=True)
+    #             bList.add(row[0])

-        print("Top numbers in DB right now: {}".format(bList))
-        print("TDengine client execution is about to start in 2 seconds...")
-        time.sleep(2.0)
-        dbManager = None  # release?
-
-    def prepare(self):
-        self._printLastNumbers()
+    #     print("Top numbers in DB right now: {}".format(bList))
+    #     print("TDengine client execution is about to start in 2 seconds...")
+    #     time.sleep(2.0)
+    #     dbManager = None  # release?

    def run(self, svcMgr):    
-        self._printLastNumbers()
+        # self._printLastNumbers()
+        global gConfig

        dbManager = DbManager()  # Regular function
        thPool = ThreadPool(gConfig.num_threads, gConfig.max_steps)
@@ -2734,15 +2907,37 @@ class ClientManager:
        # print("TC failed = {}".format(self.tc.isFailed()))
        if svcMgr: # gConfig.auto_start_service:
            svcMgr.stopTaosService()
+            svcMgr = None
        # Print exec status, etc., AFTER showing messages from the server
        self.conclude()
        # print("TC failed (2) = {}".format(self.tc.isFailed()))
        # Linux return code: ref https://shapeshed.com/unix-exit-codes/
-        return 1 if self.tc.isFailed() else 0
+        ret = 1 if self.tc.isFailed() else 0
+        self.tc.cleanup()
+
+        # Release global variables
+        gConfig = None
+        gSvcMgr = None
+        logger = None
+
+        # Release variables here
+        self.tc = None
+        thPool = None
+        dbManager = None
+
+        gc.collect() # force garbage collection
+        # h = hpy()
+        # print("\n----- Final Python Heap -----\n")        
+        # print(h.heap())
+
+        return ret

    def conclude(self):
+        # self.tc.getDbManager().cleanUp() # clean up first, so we can show ZERO db connections
        self.tc.printStats()
-        self.tc.getDbManager().cleanUp()
+
+        
+        

 class MainExec:
    STATUS_STARTING = 1
@@ -2878,6 +3073,13 @@ def main():
        '--auto-start-service',
        action='store_true',
        help='Automatically start/stop the TDengine service (default: false)')
+    parser.add_argument(
+        '-b',
+        '--max-dbs',
+        action='store',
+        default=0,
+        type=int,
+        help='Maximum number of DBs to keep, set to disable dropping DB. (default: 0)')
    parser.add_argument(
        '-c',
        '--connector-type',
@@ -2895,6 +3097,13 @@ def main():
        '--run-tdengine',
        action='store_true',
        help='Run TDengine service in foreground (default: false)')
+    parser.add_argument(
+        '-i',
+        '--max-replicas',
+        action='store',
+        default=1,
+        type=int,
+        help='Maximum number of replicas to use, when testing against clusters. (default: 1)')
    parser.add_argument(
        '-l',
        '--larger-data',
@@ -2924,6 +3133,11 @@ def main():
        default=5,
        type=int,
        help='Number of threads to run (default: 10)')
+    parser.add_argument(
+        '-v',
+        '--verify-data',
+        action='store_true',
+        help='Verify data written in a number of places by reading back (default: false)')
    parser.add_argument(
        '-x',
        '--continue-on-exception',

--- a/tests/pytest/crash_gen/valgrind_taos.supp
+++ b/tests/pytest/crash_gen/valgrind_taos.supp
--- a/tests/script/sh/deploy.sh
+++ b/tests/script/sh/deploy.sh
@@ -111,24 +111,24 @@ echo "serverPort             ${NODE}"            >> $TAOS_CFG
 echo "dataDir                $DATA_DIR"          >> $TAOS_CFG
 echo "logDir                 $LOG_DIR"           >> $TAOS_CFG
 echo "debugFlag              0"                  >> $TAOS_CFG
-echo "mDebugFlag             135"                >> $TAOS_CFG
-echo "sdbDebugFlag           135"                >> $TAOS_CFG
-echo "dDebugFlag             135"                >> $TAOS_CFG
-echo "vDebugFlag             135"                >> $TAOS_CFG
-echo "tsdbDebugFlag          135"                >> $TAOS_CFG
-echo "cDebugFlag             135"                >> $TAOS_CFG
-echo "jnidebugFlag           135"                >> $TAOS_CFG
-echo "odbcdebugFlag          135"                >> $TAOS_CFG
-echo "httpDebugFlag          135"                >> $TAOS_CFG
-echo "monitorDebugFlag       135"                >> $TAOS_CFG
-echo "mqttDebugFlag          135"                >> $TAOS_CFG
-echo "qdebugFlag             135"                >> $TAOS_CFG
-echo "rpcDebugFlag           135"                >> $TAOS_CFG
+echo "mDebugFlag             143"                >> $TAOS_CFG
+echo "sdbDebugFlag           143"                >> $TAOS_CFG
+echo "dDebugFlag             143"                >> $TAOS_CFG
+echo "vDebugFlag             143"                >> $TAOS_CFG
+echo "tsdbDebugFlag          143"                >> $TAOS_CFG
+echo "cDebugFlag             143"                >> $TAOS_CFG
+echo "jnidebugFlag           143"                >> $TAOS_CFG
+echo "odbcdebugFlag          143"                >> $TAOS_CFG
+echo "httpDebugFlag          143"                >> $TAOS_CFG
+echo "monitorDebugFlag       143"                >> $TAOS_CFG
+echo "mqttDebugFlag          143"                >> $TAOS_CFG
+echo "qdebugFlag             143"                >> $TAOS_CFG
+echo "rpcDebugFlag           143"                >> $TAOS_CFG
 echo "tmrDebugFlag           131"                >> $TAOS_CFG
-echo "udebugFlag             135"                >> $TAOS_CFG
-echo "sdebugFlag             135"                >> $TAOS_CFG
-echo "wdebugFlag             135"                >> $TAOS_CFG
-echo "cqdebugFlag            135"                >> $TAOS_CFG
+echo "udebugFlag             143"                >> $TAOS_CFG
+echo "sdebugFlag             143"                >> $TAOS_CFG
+echo "wdebugFlag             143"                >> $TAOS_CFG
+echo "cqdebugFlag            143"                >> $TAOS_CFG
 echo "monitor                0"                  >> $TAOS_CFG
 echo "monitorInterval        1"                  >> $TAOS_CFG
 echo "http                   0"                  >> $TAOS_CFG

--- a/tests/script/test.sh
+++ b/tests/script/test.sh
@@ -109,15 +109,10 @@ echo "dataDir            $DATA_DIR"               >> $TAOS_CFG
 echo "logDir             $LOG_DIR"                >> $TAOS_CFG
 echo "scriptDir          ${CODE_DIR}/../script"   >> $TAOS_CFG
 echo "numOfLogLines      100000000"               >> $TAOS_CFG
-echo "dDebugFlag         135"                     >> $TAOS_CFG
-echo "mDebugFlag         135"                     >> $TAOS_CFG
-echo "sdbDebugFlag       135"                     >> $TAOS_CFG
-echo "rpcDebugFlag       135"                     >> $TAOS_CFG
+echo "rpcDebugFlag       143"                     >> $TAOS_CFG
 echo "tmrDebugFlag       131"                     >> $TAOS_CFG
-echo "cDebugFlag         135"                     >> $TAOS_CFG
-echo "httpDebugFlag      135"                     >> $TAOS_CFG
-echo "monitorDebugFlag   135"                     >> $TAOS_CFG
-echo "udebugFlag         135"                     >> $TAOS_CFG
+echo "cDebugFlag         143"                     >> $TAOS_CFG
+echo "udebugFlag         143"                     >> $TAOS_CFG
 echo "tablemetakeeptimer 5"                       >> $TAOS_CFG
 echo "wal                0"                       >> $TAOS_CFG
 echo "asyncLog           0"                       >> $TAOS_CFG

--- a/tests/script/unique/cluster/vgroup100.sim
+++ b/tests/script/unique/cluster/vgroup100.sim
@@ -27,7 +27,16 @@ system sh/exec.sh -n dnode2 -s start
 sql create dnode $hostname3
 system sh/exec.sh -n dnode3 -s start

-sleep 5000
+sleep 3000
+
+$x = 0
+show2: 
+	$x = $x + 1
+	sleep 2000
+	if $x == 10 then
+		return -1
+	endi
+	
 sql show mnodes
 $dnode1Role = $data2_1
 $dnode2Role = $data2_2
@@ -37,6 +46,16 @@ print $dnode1Role
 print $dnode2Role 
 print $dnode3Role 

+if $dnode1Role != master then
+	goto show2
+endi
+if $dnode2Role != slave then
+	goto show2
+endi
+if $dnode3Role != slave then
+	goto show2
+endi
+
 print ============================== step3
 $count = 2
 while $count < 102

--- a/tests/script/unique/mnode/mgmt21.sim
+++ b/tests/script/unique/mnode/mgmt21.sim
@@ -26,11 +26,11 @@ $x = 0
 show2: 
 	$x = $x + 1
 	sleep 2000
-	if $x == 10 then 
+	if $x == 5 then 
 		return -1
 	endi
 	
-sql show mnodes
+sql show mnodes -x show2
 print dnode1 ==> $data2_1
 print dnode2 ==> $data2_2
 if $data2_1 != master then