diff --git a/README-CN.md b/README-CN.md index 1d96a427092bd6f96ccff8d6666379c011ca1827..53abc5c006445f33053c6364227efb11e0f7823b 100644 --- a/README-CN.md +++ b/README-CN.md @@ -39,7 +39,7 @@ TDengine 是一款开源、高性能、云原生的时序数据库 (Time-Series # 构建 -TDengine 目前可以在 Linux、 Windows、macOS 等平台上安装和运行。任何 OS 的应用也可以选择 taosAdapter 的 RESTful 接口连接服务端 taosd。CPU 支持 X64/ARM64,后续会支持 MIPS64、Alpha64、ARM32、RISC-V 等 CPU 架构。 +TDengine 目前可以在 Linux、 Windows、macOS 等平台上安装和运行。任何 OS 的应用也可以选择 taosAdapter 的 RESTful 接口连接服务端 taosd。CPU 支持 X64/ARM64,后续会支持 MIPS64、Alpha64、ARM32、RISC-V 等 CPU 架构。目前不支持使用交叉编译器构建。 用户可根据需求选择通过源码、[容器](https://docs.taosdata.com/get-started/docker/)、[安装包](https://docs.taosdata.com/get-started/package/)或[Kubernetes](https://docs.taosdata.com/deployment/k8s/)来安装。本快速指南仅适用于通过源码安装。 @@ -352,4 +352,4 @@ TDengine 提供了丰富的应用程序开发接口,其中包括 C/C++、Java # 加入技术交流群 -TDengine 官方社群「物联网大数据群」对外开放,欢迎您加入讨论。搜索微信号 "tdengine1",加小 T 为好友,即可入群。 +TDengine 官方社群「物联网大数据群」对外开放,欢迎您加入讨论。搜索微信号 "tdengine",加小 T 为好友,即可入群。 diff --git a/README.md b/README.md index f065eb26853fbaae04e142f286bda76bd1b244f6..73df4fb187fedcfdd93dafec7e95f7ccfe0da465 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,7 @@ For user manual, system design and architecture, please refer to [TDengine Docum # Building -At the moment, TDengine server supports running on Linux/Windows/macOS systems. Any application can also choose the RESTful interface provided by taosAdapter to connect the taosd service . TDengine supports X64/ARM64 CPU, and it will support MIPS64, Alpha64, ARM32, RISC-V and other CPU architectures in the future. +At the moment, TDengine server supports running on Linux/Windows/macOS systems. Any application can also choose the RESTful interface provided by taosAdapter to connect the taosd service . TDengine supports X64/ARM64 CPU, and it will support MIPS64, Alpha64, ARM32, RISC-V and other CPU architectures in the future. Right now we don't support build with cross-compiling environment. You can choose to install through source code, [container](https://docs.tdengine.com/get-started/docker/), [installation package](https://docs.tdengine.com/get-started/package/) or [Kubernetes](https://docs.tdengine.com/deployment/k8s/). This quick guide only applies to installing from source. diff --git a/cmake/cmake.version b/cmake/cmake.version index fe35fbe7bd7822b447828f34627c1a9421393a62..a6bf90fa3cb270245b206d81358cb76912e5b739 100644 --- a/cmake/cmake.version +++ b/cmake/cmake.version @@ -2,7 +2,7 @@ IF (DEFINED VERNUMBER) SET(TD_VER_NUMBER ${VERNUMBER}) ELSE () - SET(TD_VER_NUMBER "3.1.0.0.alpha") + SET(TD_VER_NUMBER "3.1.1.0.alpha") ENDIF () IF (DEFINED VERCOMPATIBLE) diff --git a/docs/en/05-get-started/03-package.md b/docs/en/05-get-started/03-package.md index 5a54c32a5137cdfdf25b6b6eca25a265c72c9242..3e3c04682faede39a46545e88f1ab682845a0730 100644 --- a/docs/en/05-get-started/03-package.md +++ b/docs/en/05-get-started/03-package.md @@ -18,7 +18,7 @@ The full package of TDengine includes the TDengine Server (`taosd`), TDengine Cl The standard server installation package includes `taos`, `taosd`, `taosAdapter`, `taosBenchmark`, and sample code. You can also download the Lite package that includes only `taosd` and the C/C++ connector. -The TDengine Community Edition is released as Deb and RPM packages. The Deb package can be installed on Debian, Ubuntu, and derivative systems. The RPM package can be installed on CentOS, RHEL, SUSE, and derivative systems. A .tar.gz package is also provided for enterprise customers, and you can install TDengine over `apt-get` as well. The .tar.tz package includes `taosdump` and the TDinsight installation script. If you want to use these utilities with the Deb or RPM package, download and install taosTools separately. TDengine can also be installed on x64 Windows and x64/m1 macOS. +TDengine OSS is released as Deb and RPM packages. The Deb package can be installed on Debian, Ubuntu, and derivative systems. The RPM package can be installed on CentOS, RHEL, SUSE, and derivative systems. A .tar.gz package is also provided for enterprise customers, and you can install TDengine over `apt-get` as well. The .tar.tz package includes `taosdump` and the TDinsight installation script. If you want to use these utilities with the Deb or RPM package, download and install taosTools separately. TDengine can also be installed on x64 Windows and x64/m1 macOS. ## Operating environment requirements In the Linux system, the minimum requirements for the operating environment are as follows: @@ -201,7 +201,7 @@ You can use the TDengine CLI to monitor your TDengine deployment and execute ad -After the installation is complete, please run `sc start taosd` or run `C:\TDengine\taosd.exe` with administrator privilege to start TDengine Server. +After the installation is complete, please run `sc start taosd` or run `C:\TDengine\taosd.exe` with administrator privilege to start TDengine Server. Please run `sc start taosadapter` or run `C:\TDengine\taosadapter.exe` with administrator privilege to start taosAdapter to provide http/REST service. ## Command Line Interface (CLI) diff --git a/docs/en/05-get-started/index.md b/docs/en/05-get-started/index.md index 66573a89cd6e181192132cc0b304f415fa25b89c..cc3b4826dd26c9a2395d22ecf85ccf23da86d6d6 100644 --- a/docs/en/05-get-started/index.md +++ b/docs/en/05-get-started/index.md @@ -21,17 +21,6 @@ import {useCurrentSidebarCategory} from '@docusaurus/theme-common'; ``` -## Study TDengine Knowledge Map - -The TDengine Knowledge Map covers the various knowledge points of TDengine, revealing the invocation relationships and data flow between various conceptual entities. Learning and understanding the TDengine Knowledge Map will help you quickly master the TDengine knowledge system. - -
-
- -
Diagram 1. TDengine Knowledge Map
-
-
- ## Join TDengine Community diff --git a/docs/en/07-develop/07-tmq.mdx b/docs/en/07-develop/07-tmq.mdx index 506a8dcc4619d15a56f7228dd51596753dd3f12d..ccf39ef5818477d7a236e73358691dc859e80447 100644 --- a/docs/en/07-develop/07-tmq.mdx +++ b/docs/en/07-develop/07-tmq.mdx @@ -298,7 +298,7 @@ You configure the following parameters when creating a consumer: | `td.connect.port` | string | Port of the server side | | | `group.id` | string | Consumer group ID; consumers with the same ID are in the same group | **Required**. Maximum length: 192. Each topic can create up to 100 consumer groups. | | `client.id` | string | Client ID | Maximum length: 192. | -| `auto.offset.reset` | enum | Initial offset for the consumer group | Specify `earliest`, `latest`, or `none`(default) | +| `auto.offset.reset` | enum | Initial offset for the consumer group | `earliest`: subscribe from the earliest data, this is the default behavior; `latest`: subscribe from the latest data; or `none`: can't subscribe without committed offset| | `enable.auto.commit` | boolean | Commit automatically; true: user application doesn't need to explicitly commit; false: user application need to handle commit by itself | Default value is true | | `auto.commit.interval.ms` | integer | Interval for automatic commits, in milliseconds | | `msg.with.table.name` | boolean | Specify whether to deserialize table names from messages | default value: false diff --git a/docs/en/07-develop/09-udf.md b/docs/en/07-develop/09-udf.md index 5137e35c0a83ec972fb45b6aa37ee10d434bbfad..7526aba43bb1b47dffdc93648351df3186ddd384 100644 --- a/docs/en/07-develop/09-udf.md +++ b/docs/en/07-develop/09-udf.md @@ -403,7 +403,7 @@ In this section we will demonstrate 5 examples of developing UDF in Python langu In the guide, some debugging skills of using Python UDF will be explained too. -We assume you are using Linux system and already have TDengine 3.0.4.0+ and Python 3.x. +We assume you are using Linux system and already have TDengine 3.0.4.0+ and Python 3.7+. Note:**You can't use print() function to output log inside a UDF, you have to write the log to a specific file or use logging module of Python.** diff --git a/docs/en/10-deployment/03-k8s.md b/docs/en/10-deployment/03-k8s.md index 070ecbfeaa52038867c65b3920684352d61235cf..10c0341598cd051ff84950958b9c1f9589ff9319 100644 --- a/docs/en/10-deployment/03-k8s.md +++ b/docs/en/10-deployment/03-k8s.md @@ -4,23 +4,31 @@ sidebar_label: Kubernetes description: This document describes how to deploy TDengine on Kubernetes. --- -TDengine is a cloud-native time-series database that can be deployed on Kubernetes. This document gives a step-by-step description of how you can use YAML files to create a TDengine cluster and introduces common operations for TDengine in a Kubernetes environment. +## Overview + +As a time series database for Cloud Native architecture design, TDengine supports Kubernetes deployment. Firstly we introduce how to use YAML files to create a highly available TDengine cluster from scratch step by step for production usage, and highlight the common operations of TDengine in Kubernetes environment. + +To meet [high availability ](https://docs.taosdata.com/tdinternal/high-availability/)requirements, clusters need to meet the following requirements: + +- 3 or more dnodes: multiple vnodes in the same vgroup of TDengine are not allowed to be distributed in one dnode at the same time, so if you create a database with 3 replicas, the number of dnodes is greater than or equal to 3 +- 3 mnodes: mnode is responsible for the management of the entire TDengine cluster. The default number of mnode in TDengine cluster is only one. If the dnode where the mnode located is dropped, the entire cluster is unavailable. +- Database 3 replicas: The TDengine replica configuration is the database level, so 3 replicas for the database must need three dnodes in the cluster. If any one dnode is offline, does not affect the normal usage of the whole cluster. **If the number of offline** **dnodes** **is 2, then the cluster is not available,** **because** ** the cluster can not complete the election based on RAFT** **.** (Enterprise version: in the disaster recovery scenario, any node data file is damaged, can be restored by pulling up the dnode again) ## Prerequisites Before deploying TDengine on Kubernetes, perform the following: -* Current steps are compatible with Kubernetes v1.5 and later version. -* Install and configure minikube, kubectl, and helm. -* Install and deploy Kubernetes and ensure that it can be accessed and used normally. Update any container registries or other services as necessary. +- This article applies Kubernetes 1.19 and above +- This article uses the **kubectl** tool to install and deploy, please install the corresponding software in advance +- Kubernetes have been installed and deployed and can access or update the necessary container repositories or other services You can download the configuration files in this document from [GitHub](https://github.com/taosdata/TDengine-Operator/tree/3.0/src/tdengine). ## Configure the service -Create a service configuration file named `taosd-service.yaml`. Record the value of `metadata.name` (in this example, `taos`) for use in the next step. Add the ports required by TDengine: +Create a service configuration file named `taosd-service.yaml`. Record the value of `metadata.name` (in this example, `taos`) for use in the next step. And then add the ports required by TDengine and record the value of the selector label "app" (in this example, `tdengine`) for use in the next step: -```yaml +```YAML --- apiVersion: v1 kind: Service @@ -31,10 +39,10 @@ metadata: spec: ports: - name: tcp6030 - - protocol: "TCP" + protocol: "TCP" port: 6030 - name: tcp6041 - - protocol: "TCP" + protocol: "TCP" port: 6041 selector: app: "tdengine" @@ -42,10 +50,11 @@ spec: ## Configure the service as StatefulSet -Configure the TDengine service as a StatefulSet. -Create the `tdengine.yaml` file and set `replicas` to 3. In this example, the region is set to Asia/Shanghai and 10 GB of standard storage are allocated per node. You can change the configuration based on your environment and business requirements. +According to Kubernetes instructions for various deployments, we will use StatefulSet as the deployment resource type of TDengine. Create the file `tdengine.yaml `, where replicas defines the number of cluster nodes as 3. The node time zone is China (Asia/Shanghai), and each node is allocated 5G standard storage (refer to the [Storage Classes ](https://kubernetes.io/docs/concepts/storage/storage-classes/)configuration storage class). You can also modify accordingly according to the actual situation. + +Please pay special attention to the startupProbe configuration. If dnode's Pod drops for a period of time and then restart, the newly launched dnode Pod will be temporarily unavailable. The reason is the startupProbe configuration is too small, Kubernetes will know that the Pod is in an abnormal state and try to restart it, then the dnode's Pod will restart frequently and never return to the normal status. Refer to [Configure Liveness, Readiness and Startup Probes](https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/) -```yaml +```YAML --- apiVersion: apps/v1 kind: StatefulSet @@ -69,14 +78,14 @@ spec: spec: containers: - name: "tdengine" - image: "tdengine/tdengine:3.0.0.0" + image: "tdengine/tdengine:3.0.7.1" imagePullPolicy: "IfNotPresent" ports: - name: tcp6030 - - protocol: "TCP" + protocol: "TCP" containerPort: 6030 - name: tcp6041 - - protocol: "TCP" + protocol: "TCP" containerPort: 6041 env: # POD_NAME for FQDN config @@ -102,12 +111,18 @@ spec: # Must set if you want a cluster. - name: TAOS_FIRST_EP value: "$(STS_NAME)-0.$(SERVICE_NAME).$(STS_NAMESPACE).svc.cluster.local:$(TAOS_SERVER_PORT)" - # TAOS_FQDN should always be set in k8s env. + # TAOS_FQND should always be set in k8s env. - name: TAOS_FQDN value: "$(POD_NAME).$(SERVICE_NAME).$(STS_NAMESPACE).svc.cluster.local" volumeMounts: - name: taosdata mountPath: /var/lib/taos + startupProbe: + exec: + command: + - taos-check + failureThreshold: 360 + periodSeconds: 10 readinessProbe: exec: command: @@ -129,266 +144,401 @@ spec: storageClassName: "standard" resources: requests: - storage: "10Gi" + storage: "5Gi" ``` ## Use kubectl to deploy TDengine -Run the following commands: +First create the corresponding namespace, and then execute the following command in sequence : -```bash -kubectl apply -f taosd-service.yaml -kubectl apply -f tdengine.yaml +```Bash +kubectl apply -f taosd-service.yaml -n tdengine-test +kubectl apply -f tdengine.yaml -n tdengine-test ``` -The preceding configuration generates a TDengine cluster with three nodes in which dnodes are automatically configured. You can run the `show dnodes` command to query the nodes in the cluster: +The above configuration will generate a three-node TDengine cluster, dnode is automatically configured, you can use the **show dnodes** command to view the nodes of the current cluster: -```bash -kubectl exec -i -t tdengine-0 -- taos -s "show dnodes" -kubectl exec -i -t tdengine-1 -- taos -s "show dnodes" -kubectl exec -i -t tdengine-2 -- taos -s "show dnodes" +```Bash +kubectl exec -it tdengine-0 -n tdengine-test -- taos -s "show dnodes" +kubectl exec -it tdengine-1 -n tdengine-test -- taos -s "show dnodes" +kubectl exec -it tdengine-2 -n tdengine-test -- taos -s "show dnodes" ``` The output is as follows: -``` +```Bash taos> show dnodes - id | endpoint | vnodes | support_vnodes | status | create_time | note | -============================================================================================================================================ - 1 | tdengine-0.taosd.default.sv... | 0 | 256 | ready | 2022-08-10 13:14:57.285 | | - 2 | tdengine-1.taosd.default.sv... | 0 | 256 | ready | 2022-08-10 13:15:11.302 | | - 3 | tdengine-2.taosd.default.sv... | 0 | 256 | ready | 2022-08-10 13:15:23.290 | | -Query OK, 3 rows in database (0.003655s) + id | endpoint | vnodes | support_vnodes | status | create_time | reboot_time | note | active_code | c_active_code | +============================================================================================================================================================================================================================================= + 1 | tdengine-0.ta... | 0 | 16 | ready | 2023-07-19 17:54:18.552 | 2023-07-19 17:54:18.469 | | | | + 2 | tdengine-1.ta... | 0 | 16 | ready | 2023-07-19 17:54:37.828 | 2023-07-19 17:54:38.698 | | | | + 3 | tdengine-2.ta... | 0 | 16 | ready | 2023-07-19 17:55:01.141 | 2023-07-19 17:55:02.039 | | | | +Query OK, 3 row(s) in set (0.001853s) +``` + +View the current mnode + +```Bash +kubectl exec -it tdengine-1 -n tdengine-test -- taos -s "show mnodes\G" +taos> show mnodes\G +*************************** 1.row *************************** + id: 1 + endpoint: tdengine-0.taosd.tdengine-test.svc.cluster.local:6030 + role: leader + status: ready +create_time: 2023-07-19 17:54:18.559 +reboot_time: 2023-07-19 17:54:19.520 +Query OK, 1 row(s) in set (0.001282s) +``` + +## Create mnode + +```Bash +kubectl exec -it tdengine-0 -n tdengine-test -- taos -s "create mnode on dnode 2" +kubectl exec -it tdengine-0 -n tdengine-test -- taos -s "create mnode on dnode 3" +``` + +View mnode + +```Bash +kubectl exec -it tdengine-1 -n tdengine-test -- taos -s "show mnodes\G" + +taos> show mnodes\G +*************************** 1.row *************************** + id: 1 + endpoint: tdengine-0.taosd.tdengine-test.svc.cluster.local:6030 + role: leader + status: ready +create_time: 2023-07-19 17:54:18.559 +reboot_time: 2023-07-20 09:19:36.060 +*************************** 2.row *************************** + id: 2 + endpoint: tdengine-1.taosd.tdengine-test.svc.cluster.local:6030 + role: follower + status: ready +create_time: 2023-07-20 09:22:05.600 +reboot_time: 2023-07-20 09:22:12.838 +*************************** 3.row *************************** + id: 3 + endpoint: tdengine-2.taosd.tdengine-test.svc.cluster.local:6030 + role: follower + status: ready +create_time: 2023-07-20 09:22:20.042 +reboot_time: 2023-07-20 09:22:23.271 +Query OK, 3 row(s) in set (0.003108s) ``` ## Enable port forwarding -The kubectl port forwarding feature allows applications to access the TDengine cluster running on Kubernetes. +Kubectl port forwarding enables applications to access TDengine clusters running in Kubernetes environments. -``` -kubectl port-forward tdengine-0 6041:6041 & +```bash +kubectl port-forward -n tdengine-test tdengine-0 6041:6041 & ``` -Use curl to verify that the TDengine REST API is working on port 6041: +Use **curl** to verify that the TDengine REST API is working on port 6041: -``` -$ curl -u root:taosdata -d "show databases" 127.0.0.1:6041/rest/sql -Handling connection for 6041 -{"code":0,"column_meta":[["name","VARCHAR",64],["create_time","TIMESTAMP",8],["vgroups","SMALLINT",2],["ntables","BIGINT",8],["replica","TINYINT",1],["strict","VARCHAR",4],["duration","VARCHAR",10],["keep","VARCHAR",32],["buffer","INT",4],["pagesize","INT",4],["pages","INT",4],["minrows","INT",4],["maxrows","INT",4],["comp","TINYINT",1],["precision","VARCHAR",2],["status","VARCHAR",10],["retention","VARCHAR",60],["single_stable","BOOL",1],["cachemodel","VARCHAR",11],["cachesize","INT",4],["wal_level","TINYINT",1],["wal_fsync_period","INT",4],["wal_retention_period","INT",4],["wal_retention_size","BIGINT",8]],"data":[["information_schema",null,null,16,null,null,null,null,null,null,null,null,null,null,null,"ready",null,null,null,null,null,null,null,null,null,null],["performance_schema",null,null,10,null,null,null,null,null,null,null,null,null,null,null,"ready",null,null,null,null,null,null,null,null,null,null]],"rows":2} +```bash +curl -u root:taosdata -d "show databases" 127.0.0.1:6041/rest/sql +{"code":0,"column_meta":[["name","VARCHAR",64]],"data":[["information_schema"],["performance_schema"],["test"],["test1"]],"rows":4} ``` -## Enable the dashboard for visualization +## Test cluster - The minikube dashboard command enables visualized cluster management. +### Data preparation -``` -$ minikube dashboard -* Verifying dashboard health ... -* Launching proxy ... -* Verifying proxy health ... -* Opening http://127.0.0.1:46617/api/v1/namespaces/kubernetes-dashboard/services/http:kubernetes-dashboard:/proxy/ in your default browser... -http://127.0.0.1:46617/api/v1/namespaces/kubernetes-dashboard/services/http:kubernetes-dashboard:/proxy/ -``` +#### taosBenchmark -In some public clouds, minikube cannot be remotely accessed if it is bound to 127.0.0.1. In this case, use the kubectl proxy command to map the port to 0.0.0.0. Then, you can access the dashboard by using a web browser to open the dashboard URL above on the public IP address and port of the virtual machine. +Create a 3 replicas database with taosBenchmark, write 100 million data at the same time, and view the data at the same time +```Bash +kubectl exec -it tdengine-0 -n tdengine-test -- taosBenchmark -I stmt -d test -n 10000 -t 10000 -a 3 + +# query data +kubectl exec -it tdengine-0 -n tdengine-test -- taos -s "select count(*) from test.meters;" + +taos> select count(*) from test.meters; + count(*) | +======================== + 100000000 | +Query OK, 1 row(s) in set (0.103537s) ``` -$ kubectl proxy --accept-hosts='^.*$' --address='0.0.0.0' -``` + +View vnode distribution by showing dnodes + +```Bash +kubectl exec -it tdengine-0 -n tdengine-test -- taos -s "show dnodes" + +taos> show dnodes + id | endpoint | vnodes | support_vnodes | status | create_time | reboot_time | note | active_code | c_active_code | +============================================================================================================================================================================================================================================= + 1 | tdengine-0.ta... | 8 | 16 | ready | 2023-07-19 17:54:18.552 | 2023-07-19 17:54:18.469 | | | | + 2 | tdengine-1.ta... | 8 | 16 | ready | 2023-07-19 17:54:37.828 | 2023-07-19 17:54:38.698 | | | | + 3 | tdengine-2.ta... | 8 | 16 | ready | 2023-07-19 17:55:01.141 | 2023-07-19 17:55:02.039 | | | | +Query OK, 3 row(s) in set (0.001357s) +``` + +View xnode distribution by showing vgroup + +```Bash +kubectl exec -it tdengine-0 -n tdengine-test -- taos -s "show test.vgroups" + +taos> show test.vgroups + vgroup_id | db_name | tables | v1_dnode | v1_status | v2_dnode | v2_status | v3_dnode | v3_status | v4_dnode | v4_status | cacheload | cacheelements | tsma | +============================================================================================================================================================================================== + 2 | test | 1267 | 1 | follower | 2 | follower | 3 | leader | NULL | NULL | 0 | 0 | 0 | + 3 | test | 1215 | 1 | follower | 2 | leader | 3 | follower | NULL | NULL | 0 | 0 | 0 | + 4 | test | 1215 | 1 | leader | 2 | follower | 3 | follower | NULL | NULL | 0 | 0 | 0 | + 5 | test | 1307 | 1 | follower | 2 | leader | 3 | follower | NULL | NULL | 0 | 0 | 0 | + 6 | test | 1245 | 1 | follower | 2 | follower | 3 | leader | NULL | NULL | 0 | 0 | 0 | + 7 | test | 1275 | 1 | follower | 2 | leader | 3 | follower | NULL | NULL | 0 | 0 | 0 | + 8 | test | 1231 | 1 | leader | 2 | follower | 3 | follower | NULL | NULL | 0 | 0 | 0 | + 9 | test | 1245 | 1 | follower | 2 | follower | 3 | leader | NULL | NULL | 0 | 0 | 0 | +Query OK, 8 row(s) in set (0.001488s) +``` + +#### Manually created + +Common a three-copy test1, and create a table, write 2 pieces of data + +```Bash +kubectl exec -it tdengine-0 -n tdengine-test -- \ + taos -s \ + "create database if not exists test1 replica 3; + use test1; + create table if not exists t1(ts timestamp, n int); + insert into t1 values(now, 1)(now+1s, 2);" +``` + +View xnode distribution by showing test1.vgroup + +```Bash +kubectl exec -it tdengine-0 -n tdengine-test -- taos -s "show test1.vgroups" + +taos> show test1.vgroups + vgroup_id | db_name | tables | v1_dnode | v1_status | v2_dnode | v2_status | v3_dnode | v3_status | v4_dnode | v4_status | cacheload | cacheelements | tsma | +============================================================================================================================================================================================== + 10 | test1 | 1 | 1 | follower | 2 | follower | 3 | leader | NULL | NULL | 0 | 0 | 0 | + 11 | test1 | 0 | 1 | follower | 2 | leader | 3 | follower | NULL | NULL | 0 | 0 | 0 | +Query OK, 2 row(s) in set (0.001489s) +``` + +### Test fault tolerance + +The dnode where the mnode leader is located is disconnected, dnode1 + +```Bash +kubectl get pod -l app=tdengine -n tdengine-test -o wide +NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES +tdengine-0 0/1 ErrImagePull 2 (2s ago) 20m 10.244.2.75 node86 +tdengine-1 1/1 Running 1 (6m48s ago) 20m 10.244.0.59 node84 +tdengine-2 1/1 Running 0 21m 10.244.1.223 node85 +``` + +At this time, the cluster mnode has a re-election, and the monde on dnode1 becomes the leader. + +```Bash +kubectl exec -it tdengine-1 -n tdengine-test -- taos -s "show mnodes\G" +Welcome to the TDengine Command Line Interface, Client Version:3.0.7.1.202307190706 +Copyright (c) 2022 by TDengine, all rights reserved. + +taos> show mnodes\G +*************************** 1.row *************************** + id: 1 + endpoint: tdengine-0.taosd.tdengine-test.svc.cluster.local:6030 + role: offline + status: offline +create_time: 2023-07-19 17:54:18.559 +reboot_time: 1970-01-01 08:00:00.000 +*************************** 2.row *************************** + id: 2 + endpoint: tdengine-1.taosd.tdengine-test.svc.cluster.local:6030 + role: leader + status: ready +create_time: 2023-07-20 09:22:05.600 +reboot_time: 2023-07-20 09:32:00.227 +*************************** 3.row *************************** + id: 3 + endpoint: tdengine-2.taosd.tdengine-test.svc.cluster.local:6030 + role: follower + status: ready +create_time: 2023-07-20 09:22:20.042 +reboot_time: 2023-07-20 09:32:00.026 +Query OK, 3 row(s) in set (0.001513s) +``` + +Cluster can read and write normally + +```Bash +# insert +kubectl exec -it tdengine-0 -n tdengine-test -- taos -s "insert into test1.t1 values(now, 1)(now+1s, 2);" + +taos> insert into test1.t1 values(now, 1)(now+1s, 2); +Insert OK, 2 row(s) affected (0.002098s) + +# select +kubectl exec -it tdengine-0 -n tdengine-test -- taos -s "select *from test1.t1" + +taos> select *from test1.t1 + ts | n | +======================================== + 2023-07-19 18:04:58.104 | 1 | + 2023-07-19 18:04:59.104 | 2 | + 2023-07-19 18:06:00.303 | 1 | + 2023-07-19 18:06:01.303 | 2 | +Query OK, 4 row(s) in set (0.001994s) +``` + +Similarly, as for the non-leader mnode dropped, read and write can of course be normal, here will not do too much display . ## Scaling Out Your Cluster -TDengine clusters can scale automatically: +TDengine cluster supports automatic expansion: -```bash +```Bash kubectl scale statefulsets tdengine --replicas=4 ``` -The preceding command increases the number of replicas to 4. After running this command, query the pod status: +The parameter `--replica = 4 `in the above command line indicates that you want to expand the TDengine cluster to 4 nodes. After execution, first check the status of the Pod: -```bash -kubectl get pods -l app=tdengine +```Bash +kubectl get pod -l app=tdengine -n tdengine-test -o wide ``` The output is as follows: -``` -NAME READY STATUS RESTARTS AGE -tdengine-0 1/1 Running 0 161m -tdengine-1 1/1 Running 0 161m -tdengine-2 1/1 Running 0 32m -tdengine-3 1/1 Running 0 32m +```Plain +NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES +tdengine-0 1/1 Running 4 (6h26m ago) 6h53m 10.244.2.75 node86 +tdengine-1 1/1 Running 1 (6h39m ago) 6h53m 10.244.0.59 node84 +tdengine-2 1/1 Running 0 5h16m 10.244.1.224 node85 +tdengine-3 1/1 Running 0 3m24s 10.244.2.76 node86 ``` -The status of all pods is Running. Once the pod status changes to Ready, you can check the dnode status: +At this time, the state of the POD is still Running, and the dnode state in the TDengine cluster can only be seen after the Pod status is `ready `: -```bash -kubectl exec -i -t tdengine-3 -- taos -s "show dnodes" +```Bash +kubectl exec -it tdengine-3 -n tdengine-test -- taos -s "show dnodes" ``` -The following output shows that the TDengine cluster has been expanded to 4 replicas: +The dnode list of the expanded four-node TDengine cluster: -``` +```Plain taos> show dnodes - id | endpoint | vnodes | support_vnodes | status | create_time | note | -============================================================================================================================================ - 1 | tdengine-0.taosd.default.sv... | 0 | 256 | ready | 2022-08-10 13:14:57.285 | | - 2 | tdengine-1.taosd.default.sv... | 0 | 256 | ready | 2022-08-10 13:15:11.302 | | - 3 | tdengine-2.taosd.default.sv... | 0 | 256 | ready | 2022-08-10 13:15:23.290 | | - 4 | tdengine-3.taosd.default.sv... | 0 | 256 | ready | 2022-08-10 13:33:16.039 | | -Query OK, 4 rows in database (0.008377s) + id | endpoint | vnodes | support_vnodes | status | create_time | reboot_time | note | active_code | c_active_code | +============================================================================================================================================================================================================================================= + 1 | tdengine-0.ta... | 10 | 16 | ready | 2023-07-19 17:54:18.552 | 2023-07-20 09:39:04.297 | | | | + 2 | tdengine-1.ta... | 10 | 16 | ready | 2023-07-19 17:54:37.828 | 2023-07-20 09:28:24.240 | | | | + 3 | tdengine-2.ta... | 10 | 16 | ready | 2023-07-19 17:55:01.141 | 2023-07-20 10:48:43.445 | | | | + 4 | tdengine-3.ta... | 0 | 16 | ready | 2023-07-20 16:01:44.007 | 2023-07-20 16:01:44.889 | | | | +Query OK, 4 row(s) in set (0.003628s) ``` ## Scaling In Your Cluster -When you scale in a TDengine cluster, your data is migrated to different nodes. You must run the drop dnodes command in TDengine to remove dnodes before scaling in your Kubernetes environment. - -Note: In a Kubernetes StatefulSet service, the newest pods are always removed first. For this reason, when you scale in your TDengine cluster, ensure that you drop the newest dnodes. +Since the TDengine cluster will migrate data between nodes during volume expansion and contraction, using the **kubectl** command to reduce the volume requires first using the "drop dnodes" command ( **If there are 3 replicas of db in the cluster, the number of dnodes after reduction must also be greater than or equal to 3, otherwise the drop dnode operation will be aborted** ), the node deletion is completed before Kubernetes cluster reduction. -``` -$ kubectl exec -i -t tdengine-0 -- taos -s "drop dnode 4" -``` +Note: Since Kubernetes Pods in the Statefulset can only be removed in reverse order of creation, the TDengine drop dnode also needs to be removed in reverse order of creation, otherwise the Pod will be in an error state. -```bash -$ kubectl exec -it tdengine-0 -- taos -s "show dnodes" +```Bash +kubectl exec -it tdengine-0 -n tdengine-test -- taos -s "drop dnode 4" +kubectl exec -it tdengine-0 -n tdengine-test -- taos -s "show dnodes" taos> show dnodes - id | endpoint | vnodes | support_vnodes | status | create_time | note | -============================================================================================================================================ - 1 | tdengine-0.taosd.default.sv... | 0 | 256 | ready | 2022-08-10 13:14:57.285 | | - 2 | tdengine-1.taosd.default.sv... | 0 | 256 | ready | 2022-08-10 13:15:11.302 | | - 3 | tdengine-2.taosd.default.sv... | 0 | 256 | ready | 2022-08-10 13:15:23.290 | | -Query OK, 3 rows in database (0.004861s) + id | endpoint | vnodes | support_vnodes | status | create_time | reboot_time | note | active_code | c_active_code | +============================================================================================================================================================================================================================================= + 1 | tdengine-0.ta... | 10 | 16 | ready | 2023-07-19 17:54:18.552 | 2023-07-20 09:39:04.297 | | | | + 2 | tdengine-1.ta... | 10 | 16 | ready | 2023-07-19 17:54:37.828 | 2023-07-20 09:28:24.240 | | | | + 3 | tdengine-2.ta... | 10 | 16 | ready | 2023-07-19 17:55:01.141 | 2023-07-20 10:48:43.445 | | | | +Query OK, 3 row(s) in set (0.003324s) ``` -Verify that the dnode have been successfully removed by running the `kubectl exec -i -t tdengine-0 -- taos -s "show dnodes"` command. Then run the following command to remove the pod: +After confirming that the removal is successful (use kubectl exec -i -t tdengine-0 --taos -s "show dnodes" to view and confirm the dnode list), use the kubectl command to remove the Pod: -``` -kubectl scale statefulsets tdengine --replicas=3 +```Plain +kubectl scale statefulsets tdengine --replicas=3 -n tdengine-test ``` -The newest pod in the deployment is removed. Run the `kubectl get pods -l app=tdengine` command to query the pod status: +The last Pod will be deleted. Use the command kubectl get pods -l app = tdengine to check the Pod status: -``` -$ kubectl get pods -l app=tdengine -NAME READY STATUS RESTARTS AGE -tdengine-0 1/1 Running 0 4m7s -tdengine-1 1/1 Running 0 3m55s -tdengine-2 1/1 Running 0 2m28s +```Plain +kubectl get pod -l app=tdengine -n tdengine-test -o wide +NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES +tdengine-0 1/1 Running 4 (6h55m ago) 7h22m 10.244.2.75 node86 +tdengine-1 1/1 Running 1 (7h9m ago) 7h23m 10.244.0.59 node84 +tdengine-2 1/1 Running 0 5h45m 10.244.1.224 node85 ``` -After the pod has been removed, manually delete the PersistentVolumeClaim (PVC). Otherwise, future scale-outs will attempt to use existing data. +After the Pod is deleted, the PVC needs to be deleted manually, otherwise the previous data will continue to be used for the next expansion, resulting in the inability to join the cluster normally. -```bash -$ kubectl delete pvc taosdata-tdengine-3 +```Bash +kubectl delete pvc aosdata-tdengine-3 -n tdengine-test ``` -Your cluster has now been safely scaled in, and you can scale it out again as necessary. +The cluster state at this time is safe and can be scaled up again if needed. -```bash -$ kubectl scale statefulsets tdengine --replicas=4 +```Bash +kubectl scale statefulsets tdengine --replicas=4 -n tdengine-test statefulset.apps/tdengine scaled -it@k8s-2:~/TDengine-Operator/src/tdengine$ kubectl get pods -l app=tdengine -NAME READY STATUS RESTARTS AGE -tdengine-0 1/1 Running 0 35m -tdengine-1 1/1 Running 0 34m -tdengine-2 1/1 Running 0 12m -tdengine-3 0/1 ContainerCreating 0 4s -it@k8s-2:~/TDengine-Operator/src/tdengine$ kubectl get pods -l app=tdengine -NAME READY STATUS RESTARTS AGE -tdengine-0 1/1 Running 0 35m -tdengine-1 1/1 Running 0 34m -tdengine-2 1/1 Running 0 12m -tdengine-3 0/1 Running 0 7s -it@k8s-2:~/TDengine-Operator/src/tdengine$ kubectl exec -it tdengine-0 -- taos -s "show dnodes" + +kubectl get pod -l app=tdengine -n tdengine-test -o wide +NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES +tdengine-0 1/1 Running 4 (6h59m ago) 7h27m 10.244.2.75 node86 +tdengine-1 1/1 Running 1 (7h13m ago) 7h27m 10.244.0.59 node84 +tdengine-2 1/1 Running 0 5h49m 10.244.1.224 node85 +tdengine-3 1/1 Running 0 20s 10.244.2.77 node86 + +kubectl exec -it tdengine-0 -n tdengine-test -- taos -s "show dnodes" taos> show dnodes -id | endpoint | vnodes | support_vnodes | status | create_time | offline reason | -====================================================================================================================================== -1 | tdengine-0.taosd.default.sv... | 0 | 4 | ready | 2022-07-25 17:38:49.012 | | -2 | tdengine-1.taosd.default.sv... | 1 | 4 | ready | 2022-07-25 17:39:01.517 | | -5 | tdengine-2.taosd.default.sv... | 0 | 4 | ready | 2022-07-25 18:01:36.479 | | -6 | tdengine-3.taosd.default.sv... | 0 | 4 | ready | 2022-07-25 18:13:54.411 | | -Query OK, 4 row(s) in set (0.001348s) + id | endpoint | vnodes | support_vnodes | status | create_time | reboot_time | note | active_code | c_active_code | +============================================================================================================================================================================================================================================= + 1 | tdengine-0.ta... | 10 | 16 | ready | 2023-07-19 17:54:18.552 | 2023-07-20 09:39:04.297 | | | | + 2 | tdengine-1.ta... | 10 | 16 | ready | 2023-07-19 17:54:37.828 | 2023-07-20 09:28:24.240 | | | | + 3 | tdengine-2.ta... | 10 | 16 | ready | 2023-07-19 17:55:01.141 | 2023-07-20 10:48:43.445 | | | | + 5 | tdengine-3.ta... | 0 | 16 | ready | 2023-07-20 16:31:34.092 | 2023-07-20 16:38:17.419 | | | | +Query OK, 4 row(s) in set (0.003881s) ``` ## Remove a TDengine Cluster -To fully remove a TDengine cluster, you must delete its statefulset, svc, configmap, and pvc entries: +> **When deleting the PVC, you need to pay attention to the pv persistentVolumeReclaimPolicy policy. It is recommended to change to Delete, so that the PV will be automatically cleaned up when the PVC is deleted, and the underlying CSI storage resources will be cleaned up at the same time. If the policy of deleting the PVC to automatically clean up the PV is not configured, and then after deleting the pvc, when manually cleaning up the PV, the CSI storage resources corresponding to the PV may not be released.** -```bash -kubectl delete statefulset -l app=tdengine -kubectl delete svc -l app=tdengine -kubectl delete pvc -l app=tdengine -kubectl delete configmap taoscfg +Complete removal of TDengine cluster, need to clean up statefulset, svc, configmap, pvc respectively. +```Bash +kubectl delete statefulset -l app=tdengine -n tdengine-test +kubectl delete svc -l app=tdengine -n tdengine-test +kubectl delete pvc -l app=tdengine -n tdengine-test +kubectl delete configmap taoscfg -n tdengine-test ``` ## Troubleshooting ### Error 1 -If you remove a pod without first running `drop dnode`, some TDengine nodes will go offline. +No "drop dnode" is directly reduced. Since the TDengine has not deleted the node, the reduced pod causes some nodes in the TDengine cluster to be offline. -``` -$ kubectl exec -it tdengine-0 -- taos -s "show dnodes" +```Plain +kubectl exec -it tdengine-0 -n tdengine-test -- taos -s "show dnodes" taos> show dnodes -id | endpoint | vnodes | support_vnodes | status | create_time | offline reason | -====================================================================================================================================== -1 | tdengine-0.taosd.default.sv... | 0 | 4 | ready | 2022-07-25 17:38:49.012 | | -2 | tdengine-1.taosd.default.sv... | 1 | 4 | ready | 2022-07-25 17:39:01.517 | | -5 | tdengine-2.taosd.default.sv... | 0 | 4 | offline | 2022-07-25 18:01:36.479 | status msg timeout | -6 | tdengine-3.taosd.default.sv... | 0 | 4 | offline | 2022-07-25 18:13:54.411 | status msg timeout | -Query OK, 4 row(s) in set (0.001323s) -``` - -### Error 2 - -If the number of nodes after a scale-in is less than the value of the replica parameter, the cluster will go down: - -Create a database with replica set to 2 and add data. - -```bash -kubectl exec -i -t tdengine-0 -- \ - taos -s \ - "create database if not exists test replica 2; - use test; - create table if not exists t1(ts timestamp, n int); - insert into t1 values(now, 1)(now+1s, 2);" - - + id | endpoint | vnodes | support_vnodes | status | create_time | reboot_time | note | active_code | c_active_code | +============================================================================================================================================================================================================================================= + 1 | tdengine-0.ta... | 10 | 16 | ready | 2023-07-19 17:54:18.552 | 2023-07-20 09:39:04.297 | | | | + 2 | tdengine-1.ta... | 10 | 16 | ready | 2023-07-19 17:54:37.828 | 2023-07-20 09:28:24.240 | | | | + 3 | tdengine-2.ta... | 10 | 16 | ready | 2023-07-19 17:55:01.141 | 2023-07-20 10:48:43.445 | | | | + 5 | tdengine-3.ta... | 0 | 16 | offline | 2023-07-20 16:31:34.092 | 2023-07-20 16:38:17.419 | status msg timeout | | | +Query OK, 4 row(s) in set (0.003862s) ``` -Scale in to one node: +## Finally -```bash -kubectl scale statefulsets tdengine --replicas=1 +For the high availability and high reliability of TDengine in a Kubernetes environment, hardware damage and disaster recovery are divided into two levels: -``` +1. The disaster recovery capability of the underlying distributed Block Storage, the multi-copy of Block Storage, the current popular distributed Block Storage such as Ceph, has the multi-copy capability, extending the storage copy to different racks, cabinets, computer rooms, Data center (or directly use the Block Storage service provided by Public Cloud vendors) +2. TDengine disaster recovery, in TDengine Enterprise, itself has when a dnode permanently offline (TCE-metal disk damage, data sorting loss), re-pull a blank dnode to restore the original dnode work. -In the TDengine CLI, you can see that no database operations succeed: +Finally, welcome to [TDengine Cloud ](https://cloud.tdengine.com/)to experience the one-stop fully managed TDengine Cloud as a Service. -``` -taos> show dnodes; - id | end_point | vnodes | cores | status | role | create_time | offline reason | -====================================================================================================================================== - 1 | tdengine-0.taosd.default.sv... | 2 | 40 | ready | any | 2021-06-01 15:55:52.562 | | - 2 | tdengine-1.taosd.default.sv... | 1 | 40 | offline | any | 2021-06-01 15:56:07.212 | status msg timeout | -Query OK, 2 row(s) in set (0.000845s) - -taos> show dnodes; - id | end_point | vnodes | cores | status | role | create_time | offline reason | -====================================================================================================================================== - 1 | tdengine-0.taosd.default.sv... | 2 | 40 | ready | any | 2021-06-01 15:55:52.562 | | - 2 | tdengine-1.taosd.default.sv... | 1 | 40 | offline | any | 2021-06-01 15:56:07.212 | status msg timeout | -Query OK, 2 row(s) in set (0.000837s) - -taos> use test; -Database changed. - -taos> insert into t1 values(now, 3); - -DB error: Unable to resolve FQDN (0.013874s) - -``` +> TDengine Cloud is a minimalist fully managed time series data processing Cloud as a Service platform developed based on the open source time series database TDengine. In addition to high-performance time series database, it also has system functions such as caching, subscription and stream computing, and provides convenient and secure data sharing, as well as numerous enterprise-level functions. It allows enterprises in the fields of Internet of Things, Industrial Internet, Finance, IT operation and maintenance monitoring to significantly reduce labor costs and operating costs in the management of time series data. diff --git a/docs/en/12-taos-sql/01-data-type.md b/docs/en/12-taos-sql/01-data-type.md index 13007d5bb1beef28a7307b648754ee2bced41a21..b9d51bcfcdbd2044d6fc8a66399832a732fda6f0 100644 --- a/docs/en/12-taos-sql/01-data-type.md +++ b/docs/en/12-taos-sql/01-data-type.md @@ -42,10 +42,20 @@ In TDengine, the data types below can be used when specifying a column or tag. | 14 | NCHAR | User Defined | Multi-byte string that can include multi byte characters like Chinese characters. Each character of NCHAR type consumes 4 bytes storage. The string value should be quoted with single quotes. Literal single quote inside the string must be preceded with backslash, like `\'`. The length must be specified when defining a column or tag of NCHAR type, for example nchar(10) means it can store at most 10 characters of nchar type and will consume fixed storage of 40 bytes. An error will be reported if the string value exceeds the length defined. | | 15 | JSON | | JSON type can only be used on tags. A tag of json type is excluded with any other tags of any other type. | | 16 | VARCHAR | User-defined | Alias of BINARY | +| 16 | GEOMETRY | User-defined | Geometry | :::note +- Each row of the table cannot be longer than 48KB (64KB since version 3.0.5.0) (note that each BINARY/NCHAR/GEOMETRY column takes up an additional 2 bytes of storage space). - Only ASCII visible characters are suggested to be used in a column or tag of BINARY type. Multi-byte characters must be stored in NCHAR type. - The length of BINARY can be up to 16,374(data column is 65,517 and tag column is 16,382 since version 3.0.5.0) bytes. The string value must be quoted with single quotes. You must specify a length in bytes for a BINARY value, for example binary(20) for up to twenty single-byte characters. If the data exceeds the specified length, an error will occur. The literal single quote inside the string must be preceded with back slash like `\'` +- The maximum length of the GEOMETRY data column is 65,517 bytes, and the maximum length of the tag column is 16,382 bytes. Supports POINT, LINESTRING, and POLYGON subtypes of 2D. The following table describes the length calculation method: + + | # | **Syntax** | **MinLen** | **MaxLen** | **Growth of each point** | + |---|--------------------------------------|------------|------------|--------------------------| + | 1 | POINT(1.0 1.0) | 21 | 21 | NA | + | 2 | LINESTRING(1.0 1.0, 2.0 2.0) | 9+2*16 | 9+4094*16 | +16 | + | 3 | POLYGON((1.0 1.0, 2.0 2.0, 1.0 1.0)) | 13+3*16 | 13+4094*16 | +16 | + - Numeric values in SQL statements will be determined as integer or float type according to whether there is decimal point or whether scientific notation is used, so attention must be paid to avoid overflow. For example, 9999999999999999999 will be considered as overflow because it exceeds the upper limit of long integer, but 9999999999999999999.0 will be considered as a legal float number. ::: diff --git a/docs/en/12-taos-sql/02-database.md b/docs/en/12-taos-sql/02-database.md index 865e9b8db0ad00301fc1b8e4f378c146ab53e1c3..e7f3aa8d1bd1a2b4c675af22813e1775c6105b7c 100644 --- a/docs/en/12-taos-sql/02-database.md +++ b/docs/en/12-taos-sql/02-database.md @@ -56,7 +56,7 @@ database_option: { - WAL_FSYNC_PERIOD: specifies the interval (in milliseconds) at which data is written from the WAL to disk. This parameter takes effect only when the WAL parameter is set to 2. The default value is 3000. Enter a value between 0 and 180000. The value 0 indicates that incoming data is immediately written to disk. - MAXROWS: specifies the maximum number of rows recorded in a block. The default value is 4096. - MINROWS: specifies the minimum number of rows recorded in a block. The default value is 100. -- KEEP: specifies the time for which data is retained. Enter a value between 1 and 365000. The default value is 3650. The value of the KEEP parameter must be greater than or equal to the value of the DURATION parameter. TDengine automatically deletes data that is older than the value of the KEEP parameter. You can use m (minutes), h (hours), and d (days) as the unit, for example KEEP 100h or KEEP 10d. If you do not include a unit, d is used by default. The Enterprise Edition supports [Tiered Storage](https://docs.tdengine.com/tdinternal/arch/#tiered-storage) function, thus multiple KEEP values (comma separated and up to 3 values supported, and meet keep 0 <= keep 1 <= keep 2, e.g. KEEP 100h,100d,3650d) are supported; the Community Edition does not support Tiered Storage function (although multiple keep values are configured, they do not take effect, only the maximum keep value is used as KEEP). +- KEEP: specifies the time for which data is retained. Enter a value between 1 and 365000. The default value is 3650. The value of the KEEP parameter must be greater than or equal to the value of the DURATION parameter. TDengine automatically deletes data that is older than the value of the KEEP parameter. You can use m (minutes), h (hours), and d (days) as the unit, for example KEEP 100h or KEEP 10d. If you do not include a unit, d is used by default. TDengine Enterprise supports [Tiered Storage](https://docs.tdengine.com/tdinternal/arch/#tiered-storage) function, thus multiple KEEP values (comma separated and up to 3 values supported, and meet keep 0 <= keep 1 <= keep 2, e.g. KEEP 100h,100d,3650d) are supported; TDengine OSS does not support Tiered Storage function (although multiple keep values are configured, they do not take effect, only the maximum keep value is used as KEEP). - PAGES: specifies the number of pages in the metadata storage engine cache on each vnode. Enter a value greater than or equal to 64. The default value is 256. The space occupied by metadata storage on each vnode is equal to the product of the values of the PAGESIZE and PAGES parameters. The space occupied by default is 1 MB. - PAGESIZE: specifies the size (in KB) of each page in the metadata storage engine cache on each vnode. The default value is 4. Enter a value between 1 and 16384. - PRECISION: specifies the precision at which a database records timestamps. Enter ms for milliseconds, us for microseconds, or ns for nanoseconds. The default value is ms. @@ -73,7 +73,7 @@ database_option: { - TABLE_PREFIX: The prefix in the table name that is ignored when distributing a table to a vgroup when it's a positive number, or only the prefix is used when distributing a table to a vgroup, the default value is 0; For example, if the table name v30001, then "0001" is used if TSDB_PREFIX is set to 2 but "v3" is used if TSDB_PREFIX is set to -2; It can help you to control the distribution of tables. - TABLE_SUFFIX: The suffix in the table name that is ignored when distributing a table to a vgroup when it's a positive number, or only the suffix is used when distributing a table to a vgroup, the default value is 0; For example, if the table name v30001, then "v300" is used if TSDB_SUFFIX is set to 2 but "01" is used if TSDB_SUFFIX is set to -2; It can help you to control the distribution of tables. - TSDB_PAGESIZE: The page size of the data storage engine in a vnode. The unit is KB. The default is 4 KB. The range is 1 to 16384, that is, 1 KB to 16 MB. -- WAL_RETENTION_PERIOD: specifies the maximum time of which WAL files are to be kept for consumption. This parameter is used for data subscription. Enter a time in seconds. The default value 0. A value of 0 indicates that WAL files are not required to keep for consumption. Alter it with a proper value at first to create topics. +- WAL_RETENTION_PERIOD: specifies the maximum time of which WAL files are to be kept for consumption. This parameter is used for data subscription. Enter a time in seconds. The default value is 3600, which means the data in latest 3600 seconds will be kept in WAL for data subscription. Please adjust this parameter to a more proper value for your data subscription. - WAL_RETENTION_SIZE: specifies the maximum total size of which WAL files are to be kept for consumption. This parameter is used for data subscription. Enter a size in KB. The default value is 0. A value of 0 indicates that the total size of WAL files to keep for consumption has no upper limit. ### Example Statement diff --git a/docs/en/12-taos-sql/03-table.md b/docs/en/12-taos-sql/03-table.md index 7f39fb58673d1b79d184884087f09327568d67ca..10c44848c95632777c98b936929f615c550accb2 100644 --- a/docs/en/12-taos-sql/03-table.md +++ b/docs/en/12-taos-sql/03-table.md @@ -9,27 +9,27 @@ You create standard tables and subtables with the `CREATE TABLE` statement. ```sql CREATE TABLE [IF NOT EXISTS] [db_name.]tb_name (create_definition [, create_definition] ...) [table_options] - + CREATE TABLE create_subtable_clause - + CREATE TABLE [IF NOT EXISTS] [db_name.]tb_name (create_definition [, create_definition] ...) [TAGS (create_definition [, create_definition] ...)] [table_options] - + create_subtable_clause: { create_subtable_clause [create_subtable_clause] ... | [IF NOT EXISTS] [db_name.]tb_name USING [db_name.]stb_name [(tag_name [, tag_name] ...)] TAGS (tag_value [, tag_value] ...) } - + create_definition: col_name column_definition - + column_definition: type_name [comment 'string_value'] - + table_options: table_option ... - + table_option: { COMMENT 'string_value' | WATERMARK duration[,duration] @@ -45,9 +45,9 @@ table_option: { 1. The first column of a table MUST be of type TIMESTAMP. It is automatically set as the primary key. 2. The maximum length of the table name is 192 bytes. -3. The maximum length of each row is 48k(64k since version 3.0.5.0) bytes, please note that the extra 2 bytes used by each BINARY/NCHAR column are also counted. +3. The maximum length of each row is 48k(64k since version 3.0.5.0) bytes, please note that the extra 2 bytes used by each BINARY/NCHAR/GEOMETRY column are also counted. 4. The name of the subtable can only consist of characters from the English alphabet, digits and underscore. Table names can't start with a digit. Table names are case insensitive. -5. The maximum length in bytes must be specified when using BINARY or NCHAR types. +5. The maximum length in bytes must be specified when using BINARY/NCHAR/GEOMETRY types. 6. Escape character "\`" can be used to avoid the conflict between table names and reserved keywords, above rules will be bypassed when using escape character on table names, but the upper limit for the name length is still valid. The table names specified using escape character are case sensitive. For example \`aBc\` and \`abc\` are different table names but `abc` and `aBc` are same table names because they are both converted to `abc` internally. Only ASCII visible characters can be used with escape character. @@ -58,7 +58,7 @@ table_option: { 3. MAX_DELAY: specifies the maximum latency for pushing computation results. The default value is 15 minutes or the value of the INTERVAL parameter, whichever is smaller. Enter a value between 0 and 15 minutes in milliseconds, seconds, or minutes. You can enter multiple values separated by commas (,). Note: Retain the default value if possible. Configuring a small MAX_DELAY may cause results to be frequently pushed, affecting storage and query performance. This parameter applies only to supertables and takes effect only when the RETENTIONS parameter has been specified for the database. 4. ROLLUP: specifies aggregate functions to roll up. Rolling up a function provides downsampled results based on multiple axes. This parameter applies only to supertables and takes effect only when the RETENTIONS parameter has been specified for the database. You can specify only one function to roll up. The rollup takes effect on all columns except TS. Enter one of the following values: avg, sum, min, max, last, or first. 5. SMA: specifies functions on which to enable small materialized aggregates (SMA). SMA is user-defined precomputation of aggregates based on data blocks. Enter one of the following values: max, min, or sum This parameter can be used with supertables and standard tables. -6. TTL: specifies the time to live (TTL) for the table. If TTL is specified when creatinga table, after the time period for which the table has been existing is over TTL, TDengine will automatically delete the table. Please be noted that the system may not delete the table at the exact moment that the TTL expires but guarantee there is such a system and finally the table will be deleted. The unit of TTL is in days. The default value is 0, i.e. never expire. +6. TTL: specifies the time to live (TTL) for the table. If TTL is specified when creatinga table, after the time period for which the table has been existing is over TTL, TDengine will automatically delete the table. Please be noted that the system may not delete the table at the exact moment that the TTL expires but guarantee there is such a system and finally the table will be deleted. The unit of TTL is in days. The default value is 0, i.e. never expire. ## Create Subtables @@ -88,7 +88,7 @@ You can create multiple subtables in a single SQL statement provided that all su ```sql ALTER TABLE [db_name.]tb_name alter_table_clause - + alter_table_clause: { alter_table_options | ADD COLUMN col_name column_type @@ -96,10 +96,10 @@ alter_table_clause: { | MODIFY COLUMN col_name column_type | RENAME COLUMN old_col_name new_col_name } - + alter_table_options: alter_table_option ... - + alter_table_option: { TTL value | COMMENT 'string_value' @@ -142,15 +142,15 @@ ALTER TABLE tb_name RENAME COLUMN old_col_name new_col_name ```sql ALTER TABLE [db_name.]tb_name alter_table_clause - + alter_table_clause: { alter_table_options | SET TAG tag_name = new_tag_value } - + alter_table_options: alter_table_option ... - + alter_table_option: { TTL value | COMMENT 'string_value' diff --git a/docs/en/12-taos-sql/16-operators.md b/docs/en/12-taos-sql/16-operators.md index 32ad4e7075b6a510cb537016effb6064e6c51794..9328d1688a0a2e74cad78da01fbf26c508c20159 100644 --- a/docs/en/12-taos-sql/16-operators.md +++ b/docs/en/12-taos-sql/16-operators.md @@ -39,7 +39,7 @@ TDengine supports the `UNION` and `UNION ALL` operations. UNION ALL collects all | 3 | \>, < | All types except BLOB, MEDIUMBLOB, and JSON | Greater than and less than | | 4 | \>=, <= | All types except BLOB, MEDIUMBLOB, and JSON | Greater than or equal to and less than or equal to | | 5 | IS [NOT] NULL | All types | Indicates whether the value is null | -| 6 | [NOT] BETWEEN AND | All types except BLOB, MEDIUMBLOB, and JSON | Closed interval comparison | +| 6 | [NOT] BETWEEN AND | All types except BLOB, MEDIUMBLOB, JSON and GEOMETRY | Closed interval comparison | | 7 | IN | All types except BLOB, MEDIUMBLOB, and JSON; the primary key (timestamp) is also not supported | Equal to any value in the list | | 8 | LIKE | BINARY, NCHAR, and VARCHAR | Wildcard match | | 9 | MATCH, NMATCH | BINARY, NCHAR, and VARCHAR | Regular expression match | diff --git a/docs/en/12-taos-sql/22-meta.md b/docs/en/12-taos-sql/22-meta.md index 47439ddf204b2224f25236adb470ea90097e774b..37304633e76b9c3c62106baa06debe54d5b922a0 100644 --- a/docs/en/12-taos-sql/22-meta.md +++ b/docs/en/12-taos-sql/22-meta.md @@ -28,47 +28,47 @@ This document introduces the tables of INFORMATION_SCHEMA and their structure. Provides information about dnodes. Similar to SHOW DNODES. -| # | **Column** | **Data Type** | **Description** | -| --- | :------------: | ------------ | ------------------------- | -| 1 | vnodes | SMALLINT | Current number of vnodes on the dnode. It should be noted that `vnodes` is a TDengine keyword and needs to be escaped with ` when used as a column name. | -| 2 | support_vnodes | SMALLINT | Maximum number of vnodes on the dnode | -| 3 | status | BINARY(10) | Current status | -| 4 | note | BINARY(256) | Reason for going offline or other information | -| 5 | id | SMALLINT | Dnode ID | -| 6 | endpoint | BINARY(134) | Dnode endpoint | -| 7 | create | TIMESTAMP | Creation time | +| # | **Column** | **Data Type** | **Description** | +| --- | :------------: | ------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------- | +| 1 | vnodes | SMALLINT | Current number of vnodes on the dnode. It should be noted that `vnodes` is a TDengine keyword and needs to be escaped with ` when used as a column name. | +| 2 | support_vnodes | SMALLINT | Maximum number of vnodes on the dnode | +| 3 | status | BINARY(10) | Current status | +| 4 | note | BINARY(256) | Reason for going offline or other information | +| 5 | id | SMALLINT | Dnode ID | +| 6 | endpoint | BINARY(134) | Dnode endpoint | +| 7 | create | TIMESTAMP | Creation time | ## INS_MNODES Provides information about mnodes. Similar to SHOW MNODES. -| # | **Column** | **Data Type** | **Description** | -| --- | :---------: | ------------ | ------------------ | -| 1 | id | SMALLINT | Mnode ID | -| 2 | endpoint | BINARY(134) | Mnode endpoint | -| 3 | role | BINARY(10) | Current role | -| 4 | role_time | TIMESTAMP | Time at which the current role was assumed | -| 5 | create_time | TIMESTAMP | Creation time | +| # | **Column** | **Data Type** | **Description** | +| --- | :---------: | ------------- | ------------------------------------------ | +| 1 | id | SMALLINT | Mnode ID | +| 2 | endpoint | BINARY(134) | Mnode endpoint | +| 3 | role | BINARY(10) | Current role | +| 4 | role_time | TIMESTAMP | Time at which the current role was assumed | +| 5 | create_time | TIMESTAMP | Creation time | ## INS_QNODES Provides information about qnodes. Similar to SHOW QNODES. -| # | **Column** | **Data Type** | **Description** | -| --- | :---------: | ------------ | ------------ | -| 1 | id | SMALLINT | Qnode ID | -| 2 | endpoint | BINARY(134) | Qnode endpoint | -| 3 | create_time | TIMESTAMP | Creation time | +| # | **Column** | **Data Type** | **Description** | +| --- | :---------: | ------------- | --------------- | +| 1 | id | SMALLINT | Qnode ID | +| 2 | endpoint | BINARY(134) | Qnode endpoint | +| 3 | create_time | TIMESTAMP | Creation time | ## INS_CLUSTER Provides information about the cluster. -| # | **Column** | **Data Type** | **Description** | -| --- | :---------: | ------------ | ---------- | -| 1 | id | BIGINT | Cluster ID | -| 2 | name | BINARY(134) | Cluster name | -| 3 | create_time | TIMESTAMP | Creation time | +| # | **Column** | **Data Type** | **Description** | +| --- | :---------: | ------------- | --------------- | +| 1 | id | BIGINT | Cluster ID | +| 2 | name | BINARY(134) | Cluster name | +| 3 | create_time | TIMESTAMP | Creation time | ## INS_DATABASES @@ -98,7 +98,7 @@ Provides information about user-created databases. Similar to SHOW DATABASES. | 21 | cachesize | INT | Memory per vnode used for caching the newest data. It should be noted that `cachesize` is a TDengine keyword and needs to be escaped with ` when used as a column name. | | 22 | wal_level | INT | WAL level. It should be noted that `wal_level` is a TDengine keyword and needs to be escaped with ` when used as a column name. | | 23 | wal_fsync_period | INT | Interval at which WAL is written to disk. It should be noted that `wal_fsync_period` is a TDengine keyword and needs to be escaped with ` when used as a column name. | -| 24 | wal_retention_period | INT | WAL retention period. It should be noted that `wal_retention_period` is a TDengine keyword and needs to be escaped with ` when used as a column name. | +| 24 | wal_retention_period | INT | WAL retention period, in second. It should be noted that `wal_retention_period` is a TDengine keyword and needs to be escaped with ` when used as a column name. | | 25 | wal_retention_size | INT | Maximum WAL size. It should be noted that `wal_retention_size` is a TDengine keyword and needs to be escaped with ` when used as a column name. | | 26 | stt_trigger | SMALLINT | The threshold for number of files to trigger file merging. It should be noted that `stt_trigger` is a TDengine keyword and needs to be escaped with ` when used as a column name. | | 27 | table_prefix | SMALLINT | The prefix length in the table name that is ignored when distributing table to vnode based on table name. It should be noted that `table_prefix` is a TDengine keyword and needs to be escaped with ` when used as a column name. | @@ -109,191 +109,201 @@ Provides information about user-created databases. Similar to SHOW DATABASES. Provides information about user-defined functions. -| # | **Column** | **Data Type** | **Description** | -| --- | :---------: | ------------ | -------------- | -| 1 | name | BINARY(64) | Function name | -| 2 | comment | BINARY(255) | Function description. It should be noted that `comment` is a TDengine keyword and needs to be escaped with ` when used as a column name. | -| 3 | aggregate | INT | Whether the UDF is an aggregate function. It should be noted that `aggregate` is a TDengine keyword and needs to be escaped with ` when used as a column name. | -| 4 | output_type | BINARY(31) | Output data type | -| 5 | create_time | TIMESTAMP | Creation time | -| 6 | code_len | INT | Length of the source code | -| 7 | bufsize | INT | Buffer size | -| 8 | func_language | BINARY(31) | UDF programming language | -| 9 | func_body | BINARY(16384) | UDF function body | -| 10 | func_version | INT | UDF function version. starting from 0. Increasing by 1 each time it is updated| +| # | **Column** | **Data Type** | **Description** | +| --- | :-----------: | ------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| 1 | name | BINARY(64) | Function name | +| 2 | comment | BINARY(255) | Function description. It should be noted that `comment` is a TDengine keyword and needs to be escaped with ` when used as a column name. | +| 3 | aggregate | INT | Whether the UDF is an aggregate function. It should be noted that `aggregate` is a TDengine keyword and needs to be escaped with ` when used as a column name. | +| 4 | output_type | BINARY(31) | Output data type | +| 5 | create_time | TIMESTAMP | Creation time | +| 6 | code_len | INT | Length of the source code | +| 7 | bufsize | INT | Buffer size | +| 8 | func_language | BINARY(31) | UDF programming language | +| 9 | func_body | BINARY(16384) | UDF function body | +| 10 | func_version | INT | UDF function version. starting from 0. Increasing by 1 each time it is updated | ## INS_INDEXES Provides information about user-created indices. Similar to SHOW INDEX. -| # | **Column** | **Data Type** | **Description** | -| --- | :--------------: | ------------ | ---------------------------------------------------------------------------------- | -| 1 | db_name | BINARY(32) | Database containing the table with the specified index | -| 2 | table_name | BINARY(192) | Table containing the specified index | -| 3 | index_name | BINARY(192) | Index name | -| 4 | db_name | BINARY(64) | Index column | -| 5 | index_type | BINARY(10) | SMA or FULLTEXT index | -| 6 | index_extensions | BINARY(256) | Other information For SMA indices, this shows a list of functions. For FULLTEXT indices, this is null. | +| # | **Column** | **Data Type** | **Description** | +| --- | :--------------: | ------------- | --------------------------------------------------------------------- | +| 1 | db_name | BINARY(32) | Database containing the table with the specified index | +| 2 | table_name | BINARY(192) | Table containing the specified index | +| 3 | index_name | BINARY(192) | Index name | +| 4 | db_name | BINARY(64) | Index column | +| 5 | index_type | BINARY(10) | SMA or tag index | +| 6 | index_extensions | BINARY(256) | Other information For SMA/tag indices, this shows a list of functions | ## INS_STABLES Provides information about supertables. -| # | **Column** | **Data Type** | **Description** | -| --- | :-----------: | ------------ | ------------------------ | -| 1 | stable_name | BINARY(192) | Supertable name | -| 2 | db_name | BINARY(64) | All databases in the supertable | -| 3 | create_time | TIMESTAMP | Creation time | -| 4 | columns | INT | Number of columns | -| 5 | tags | INT | Number of tags. It should be noted that `tags` is a TDengine keyword and needs to be escaped with ` when used as a column name. | -| 6 | last_update | TIMESTAMP | Last updated time | -| 7 | table_comment | BINARY(1024) | Table description | -| 8 | watermark | BINARY(64) | Window closing time. It should be noted that `watermark` is a TDengine keyword and needs to be escaped with ` when used as a column name. | -| 9 | max_delay | BINARY(64) | Maximum delay for pushing stream processing results. It should be noted that `max_delay` is a TDengine keyword and needs to be escaped with ` when used as a column name. | -| 10 | rollup | BINARY(128) | Rollup aggregate function. It should be noted that `rollup` is a TDengine keyword and needs to be escaped with ` when used as a column name. | +| # | **Column** | **Data Type** | **Description** | +| --- | :-----------: | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| 1 | stable_name | BINARY(192) | Supertable name | +| 2 | db_name | BINARY(64) | All databases in the supertable | +| 3 | create_time | TIMESTAMP | Creation time | +| 4 | columns | INT | Number of columns | +| 5 | tags | INT | Number of tags. It should be noted that `tags` is a TDengine keyword and needs to be escaped with ` when used as a column name. | +| 6 | last_update | TIMESTAMP | Last updated time | +| 7 | table_comment | BINARY(1024) | Table description | +| 8 | watermark | BINARY(64) | Window closing time. It should be noted that `watermark` is a TDengine keyword and needs to be escaped with ` when used as a column name. | +| 9 | max_delay | BINARY(64) | Maximum delay for pushing stream processing results. It should be noted that `max_delay` is a TDengine keyword and needs to be escaped with ` when used as a column name. | +| 10 | rollup | BINARY(128) | Rollup aggregate function. It should be noted that `rollup` is a TDengine keyword and needs to be escaped with ` when used as a column name. | ## INS_TABLES Provides information about standard tables and subtables. -| # | **Column** | **Data Type** | **Description** | -| --- | :-----------: | ------------ | ---------------- | -| 1 | table_name | BINARY(192) | Table name | -| 2 | db_name | BINARY(64) | Database name | -| 3 | create_time | TIMESTAMP | Creation time | -| 4 | columns | INT | Number of columns | -| 5 | stable_name | BINARY(192) | Supertable name | -| 6 | uid | BIGINT | Table ID | -| 7 | vgroup_id | INT | Vgroup ID | -| 8 | ttl | INT | Table time-to-live. It should be noted that `ttl` is a TDengine keyword and needs to be escaped with ` when used as a column name. | -| 9 | table_comment | BINARY(1024) | Table description | -| 10 | type | BINARY(20) | Table type | +| # | **Column** | **Data Type** | **Description** | +| --- | :-----------: | ------------- | ---------------------------------------------------------------------------------------------------------------------------------- | +| 1 | table_name | BINARY(192) | Table name | +| 2 | db_name | BINARY(64) | Database name | +| 3 | create_time | TIMESTAMP | Creation time | +| 4 | columns | INT | Number of columns | +| 5 | stable_name | BINARY(192) | Supertable name | +| 6 | uid | BIGINT | Table ID | +| 7 | vgroup_id | INT | Vgroup ID | +| 8 | ttl | INT | Table time-to-live. It should be noted that `ttl` is a TDengine keyword and needs to be escaped with ` when used as a column name. | +| 9 | table_comment | BINARY(1024) | Table description | +| 10 | type | BINARY(20) | Table type | ## INS_TAGS -| # | **Column** | **Data Type** | **Description** | -| --- | :---------: | ------------- | ---------------------- | -| 1 | table_name | BINARY(192) | Table name | -| 2 | db_name | BINARY(64) | Database name | -| 3 | stable_name | BINARY(192) | Supertable name | -| 4 | tag_name | BINARY(64) | Tag name | -| 5 | tag_type | BINARY(64) | Tag type | -| 6 | tag_value | BINARY(16384) | Tag value | +| # | **Column** | **Data Type** | **Description** | +| --- | :---------: | ------------- | --------------- | +| 1 | table_name | BINARY(192) | Table name | +| 2 | db_name | BINARY(64) | Database name | +| 3 | stable_name | BINARY(192) | Supertable name | +| 4 | tag_name | BINARY(64) | Tag name | +| 5 | tag_type | BINARY(64) | Tag type | +| 6 | tag_value | BINARY(16384) | Tag value | ## INS_COLUMNS -| # | **Column** | **Data Type** | **Description** | -| --- | :---------: | ------------- | ---------------------- | -| 1 | table_name | BINARY(192) | Table name | -| 2 | db_name | BINARY(64) | Database name | -| 3 | table_type | BINARY(21) | Table type | -| 4 | col_name | BINARY(64) | Column name | -| 5 | col_type | BINARY(32) | Column type | -| 6 | col_length | INT | Column length | -| 7 | col_precision | INT | Column precision | -| 8 | col_scale | INT | Column scale | -| 9 | col_nullable | INT | Column nullable | +| # | **Column** | **Data Type** | **Description** | +| --- | :-----------: | ------------- | ---------------- | +| 1 | table_name | BINARY(192) | Table name | +| 2 | db_name | BINARY(64) | Database name | +| 3 | table_type | BINARY(21) | Table type | +| 4 | col_name | BINARY(64) | Column name | +| 5 | col_type | BINARY(32) | Column type | +| 6 | col_length | INT | Column length | +| 7 | col_precision | INT | Column precision | +| 8 | col_scale | INT | Column scale | +| 9 | col_nullable | INT | Column nullable | ## INS_USERS Provides information about TDengine users. -| # | **Column** | **Data Type** | **Description** | -| --- | :---------: | ------------ | -------- | -| 1 | user_name | BINARY(23) | User name | -| 2 | privilege | BINARY(256) | User permissions | -| 3 | create_time | TIMESTAMP | Creation time | +| # | **Column** | **Data Type** | **Description** | +| --- | :---------: | ------------- | ---------------- | +| 1 | user_name | BINARY(23) | User name | +| 2 | privilege | BINARY(256) | User permissions | +| 3 | create_time | TIMESTAMP | Creation time | ## INS_GRANTS Provides information about TDengine Enterprise Edition permissions. -| # | **Column** | **Data Type** | **Description** | -| --- | :---------: | ------------ | -------------------------------------------------- | -| 1 | version | BINARY(9) | Whether the deployment is a licensed or trial version | -| 2 | cpu_cores | BINARY(9) | CPU cores included in license | -| 3 | dnodes | BINARY(10) | Dnodes included in license. It should be noted that `dnodes` is a TDengine keyword and needs to be escaped with ` when used as a column name. | -| 4 | streams | BINARY(10) | Streams included in license. It should be noted that `streams` is a TDengine keyword and needs to be escaped with ` when used as a column name. | -| 5 | users | BINARY(10) | Users included in license. It should be noted that `users` is a TDengine keyword and needs to be escaped with ` when used as a column name. | -| 6 | accounts | BINARY(10) | Accounts included in license. It should be noted that `accounts` is a TDengine keyword and needs to be escaped with ` when used as a column name. | -| 7 | storage | BINARY(21) | Storage space included in license. It should be noted that `storage` is a TDengine keyword and needs to be escaped with ` when used as a column name. | -| 8 | connections | BINARY(21) | Client connections included in license. It should be noted that `connections` is a TDengine keyword and needs to be escaped with ` when used as a column name. | -| 9 | databases | BINARY(11) | Databases included in license. It should be noted that `databases` is a TDengine keyword and needs to be escaped with ` when used as a column name. | -| 10 | speed | BINARY(9) | Write speed specified in license (data points per second) | -| 11 | querytime | BINARY(9) | Total query time specified in license | -| 12 | timeseries | BINARY(21) | Number of metrics included in license | -| 13 | expired | BINARY(5) | Whether the license has expired | -| 14 | expire_time | BINARY(19) | When the trial period expires | +| # | **Column** | **Data Type** | **Description** | +| --- | :---------: | ------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| 1 | version | BINARY(9) | Whether the deployment is a licensed or trial version | +| 2 | cpu_cores | BINARY(9) | CPU cores included in license | +| 3 | dnodes | BINARY(10) | Dnodes included in license. It should be noted that `dnodes` is a TDengine keyword and needs to be escaped with ` when used as a column name. | +| 4 | streams | BINARY(10) | Streams included in license. It should be noted that `streams` is a TDengine keyword and needs to be escaped with ` when used as a column name. | +| 5 | users | BINARY(10) | Users included in license. It should be noted that `users` is a TDengine keyword and needs to be escaped with ` when used as a column name. | +| 6 | accounts | BINARY(10) | Accounts included in license. It should be noted that `accounts` is a TDengine keyword and needs to be escaped with ` when used as a column name. | +| 7 | storage | BINARY(21) | Storage space included in license. It should be noted that `storage` is a TDengine keyword and needs to be escaped with ` when used as a column name. | +| 8 | connections | BINARY(21) | Client connections included in license. It should be noted that `connections` is a TDengine keyword and needs to be escaped with ` when used as a column name. | +| 9 | databases | BINARY(11) | Databases included in license. It should be noted that `databases` is a TDengine keyword and needs to be escaped with ` when used as a column name. | +| 10 | speed | BINARY(9) | Write speed specified in license (data points per second) | +| 11 | querytime | BINARY(9) | Total query time specified in license | +| 12 | timeseries | BINARY(21) | Number of metrics included in license | +| 13 | expired | BINARY(5) | Whether the license has expired | +| 14 | expire_time | BINARY(19) | When the trial period expires | ## INS_VGROUPS Provides information about vgroups. -| # | **Column** | **Data Type** | **Description** | -| --- | :-------: | ------------ | ------------------------------------------------------ | -| 1 | vgroup_id | INT | Vgroup ID | -| 2 | db_name | BINARY(32) | Database name | -| 3 | tables | INT | Tables in vgroup. It should be noted that `tables` is a TDengine keyword and needs to be escaped with ` when used as a column name. | -| 4 | status | BINARY(10) | Vgroup status | -| 5 | v1_dnode | INT | Dnode ID of first vgroup member | -| 6 | v1_status | BINARY(10) | Status of first vgroup member | -| 7 | v2_dnode | INT | Dnode ID of second vgroup member | -| 8 | v2_status | BINARY(10) | Status of second vgroup member | -| 9 | v3_dnode | INT | Dnode ID of third vgroup member | -| 10 | v3_status | BINARY(10) | Status of third vgroup member | -| 11 | nfiles | INT | Number of data and metadata files in the vgroup | -| 12 | file_size | INT | Size of the data and metadata files in the vgroup | -| 13 | tsma | TINYINT | Whether time-range-wise SMA is enabled. 1 means enabled; 0 means disabled. | +| # | **Column** | **Data Type** | **Description** | +| --- | :--------: | ------------- | ----------------------------------------------------------------------------------------------------------------------------------- | +| 1 | vgroup_id | INT | Vgroup ID | +| 2 | db_name | BINARY(32) | Database name | +| 3 | tables | INT | Tables in vgroup. It should be noted that `tables` is a TDengine keyword and needs to be escaped with ` when used as a column name. | +| 4 | status | BINARY(10) | Vgroup status | +| 5 | v1_dnode | INT | Dnode ID of first vgroup member | +| 6 | v1_status | BINARY(10) | Status of first vgroup member | +| 7 | v2_dnode | INT | Dnode ID of second vgroup member | +| 8 | v2_status | BINARY(10) | Status of second vgroup member | +| 9 | v3_dnode | INT | Dnode ID of third vgroup member | +| 10 | v3_status | BINARY(10) | Status of third vgroup member | +| 11 | nfiles | INT | Number of data and metadata files in the vgroup | +| 12 | file_size | INT | Size of the data and metadata files in the vgroup | +| 13 | tsma | TINYINT | Whether time-range-wise SMA is enabled. 1 means enabled; 0 means disabled. | ## INS_CONFIGS Provides system configuration information. -| # | **Column** | **Data Type** | **Description** | -| --- | :------: | ------------ | ------------ | -| 1 | name | BINARY(32) | Parameter | -| 2 | value | BINARY(64) | Value. It should be noted that `value` is a TDengine keyword and needs to be escaped with ` when used as a column name. | +| # | **Column** | **Data Type** | **Description** | +| --- | :--------: | ------------- | ----------------------------------------------------------------------------------------------------------------------- | +| 1 | name | BINARY(32) | Parameter | +| 2 | value | BINARY(64) | Value. It should be noted that `value` is a TDengine keyword and needs to be escaped with ` when used as a column name. | ## INS_DNODE_VARIABLES Provides dnode configuration information. -| # | **Column** | **Data Type** | **Description** | -| --- | :------: | ------------ | ------------ | -| 1 | dnode_id | INT | Dnode ID | -| 2 | name | BINARY(32) | Parameter | -| 3 | value | BINARY(64) | Value. It should be noted that `value` is a TDengine keyword and needs to be escaped with ` when used as a column name. | +| # | **Column** | **Data Type** | **Description** | +| --- | :--------: | ------------- | ----------------------------------------------------------------------------------------------------------------------- | +| 1 | dnode_id | INT | Dnode ID | +| 2 | name | BINARY(32) | Parameter | +| 3 | value | BINARY(64) | Value. It should be noted that `value` is a TDengine keyword and needs to be escaped with ` when used as a column name. | ## INS_TOPICS -| # | **Column** | **Data Type** | **Description** | -| --- | :---------: | ------------ | ------------------------------ | -| 1 | topic_name | BINARY(192) | Topic name | -| 2 | db_name | BINARY(64) | Database for the topic | -| 3 | create_time | TIMESTAMP | Creation time | -| 4 | sql | BINARY(1024) | SQL statement used to create the topic | +| # | **Column** | **Data Type** | **Description** | +| --- | :---------: | ------------- | -------------------------------------- | +| 1 | topic_name | BINARY(192) | Topic name | +| 2 | db_name | BINARY(64) | Database for the topic | +| 3 | create_time | TIMESTAMP | Creation time | +| 4 | sql | BINARY(1024) | SQL statement used to create the topic | ## INS_SUBSCRIPTIONS -| # | **Column** | **Data Type** | **Description** | -| --- | :------------: | ------------ | ------------------------ | -| 1 | topic_name | BINARY(204) | Subscribed topic | -| 2 | consumer_group | BINARY(193) | Subscribed consumer group | -| 3 | vgroup_id | INT | Vgroup ID for the consumer | -| 4 | consumer_id | BIGINT | Consumer ID | -| 5 | offset | BINARY(64) | Consumption progress | -| 6 | rows | BIGINT | Number of consumption items | +| # | **Column** | **Data Type** | **Description** | +| --- | :------------: | ------------- | --------------------------- | +| 1 | topic_name | BINARY(204) | Subscribed topic | +| 2 | consumer_group | BINARY(193) | Subscribed consumer group | +| 3 | vgroup_id | INT | Vgroup ID for the consumer | +| 4 | consumer_id | BIGINT | Consumer ID | +| 5 | offset | BINARY(64) | Consumption progress | +| 6 | rows | BIGINT | Number of consumption items | ## INS_STREAMS -| # | **Column** | **Data Type** | **Description** | -| --- | :----------: | ------------ | --------------------------------------- | -| 1 | stream_name | BINARY(64) | Stream name | -| 2 | create_time | TIMESTAMP | Creation time | -| 3 | sql | BINARY(1024) | SQL statement used to create the stream | -| 4 | status | BINARY(20) | Current status | -| 5 | source_db | BINARY(64) | Source database | -| 6 | target_db | BINARY(64) | Target database | -| 7 | target_table | BINARY(192) | Target table | -| 8 | watermark | BIGINT | Watermark (see stream processing documentation). It should be noted that `watermark` is a TDengine keyword and needs to be escaped with ` when used as a column name. | -| 9 | trigger | INT | Method of triggering the result push (see stream processing documentation). It should be noted that `trigger` is a TDengine keyword and needs to be escaped with ` when used as a column name. | +| # | **Column** | **Data Type** | **Description** | +| --- | :----------: | ------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| 1 | stream_name | BINARY(64) | Stream name | +| 2 | create_time | TIMESTAMP | Creation time | +| 3 | sql | BINARY(1024) | SQL statement used to create the stream | +| 4 | status | BINARY(20) | Current status | +| 5 | source_db | BINARY(64) | Source database | +| 6 | target_db | BINARY(64) | Target database | +| 7 | target_table | BINARY(192) | Target table | +| 8 | watermark | BIGINT | Watermark (see stream processing documentation). It should be noted that `watermark` is a TDengine keyword and needs to be escaped with ` when used as a column name. | +| 9 | trigger | INT | Method of triggering the result push (see stream processing documentation). It should be noted that `trigger` is a TDengine keyword and needs to be escaped with ` when used as a column name. | + +## INS_USER_PRIVILEGES + +| # | **Column** | **Data Type** | **Description** |** | +| --- | :----------: | ------------ | -------------------------------------------| +| 1 | user_name | VARCHAR(24) | Username | +| 2 | privilege | VARCHAR(10) | Privilege description | +| 3 | db_name | VARCHAR(65) | Database name | +| 4 | table_name | VARCHAR(193) | Table name | +| 5 | condition | VARCHAR(49152) | The privilege filter for child tables | diff --git a/docs/en/12-taos-sql/27-index.md b/docs/en/12-taos-sql/27-index.md index 6d029bdd92e393cca7d4cfee1232b9a4e878b126..a89c8929c1b632e5a2deb8d493459d58f57b0cc1 100644 --- a/docs/en/12-taos-sql/27-index.md +++ b/docs/en/12-taos-sql/27-index.md @@ -4,12 +4,12 @@ sidebar_label: Indexing description: This document describes the SQL statements related to indexing in TDengine. --- -TDengine supports SMA and FULLTEXT indexing. +TDengine supports SMA and tag indexing. ## Create an Index ```sql -CREATE FULLTEXT INDEX index_name ON tb_name (col_name [, col_name] ...) +CREATE INDEX index_name ON tb_name (col_name [, col_name] ...) CREATE SMA INDEX index_name ON tb_name index_option @@ -46,10 +46,6 @@ SELECT _wstart,_wend,_wduration,max(c2),min(c1) FROM st1 INTERVAL(5m,10s) SLIDIN ALTER LOCAL 'querySmaOptimize' '0'; ``` -### FULLTEXT Indexing - -Creates a text index for the specified column. FULLTEXT indexing improves performance for queries with text filtering. The index_option syntax is not supported for FULLTEXT indexing. FULLTEXT indexing is supported for JSON tag columns only. Multiple columns cannot be indexed together. However, separate indices can be created for each column. - ## Delete an Index ```sql diff --git a/docs/en/12-taos-sql/29-changes.md b/docs/en/12-taos-sql/29-changes.md index d668aa834514d162c86e868a8c652686a9502397..bbb52db4d9100c6643b6ec84460f273e9be3f2e6 100644 --- a/docs/en/12-taos-sql/29-changes.md +++ b/docs/en/12-taos-sql/29-changes.md @@ -18,6 +18,7 @@ description: This document describes how TDengine SQL has changed in version 3.0 | 8 | Mixed operations | Enhanced | Mixing scalar and vector operations in queries has been enhanced and is supported in all SELECT clauses. | 9 | Tag operations | Added | Tag columns can be used in queries and clauses like data columns. | 10 | Timeline clauses and time functions in supertables | Enhanced | When PARTITION BY is not used, data in supertables is merged into a single timeline. +| 11 | GEOMETRY | Added | Geometry ## SQL Syntax diff --git a/docs/en/13-operation/10-monitor.md b/docs/en/13-operation/10-monitor.md index 197dda20eecd03bd8f77a0f33b118963d3fb75c9..c1c6ac3c4ca1dd047a0501415095abb721017f73 100644 --- a/docs/en/13-operation/10-monitor.md +++ b/docs/en/13-operation/10-monitor.md @@ -214,19 +214,6 @@ The data of tdinsight dashboard is stored in `log` database (default. You can ch |dnode\_ep|NCHAR|TAG|dnode endpoint| |cluster\_id|NCHAR|TAG|cluster id| -### logs table - -`logs` table contains login information records. - -|field|type|is\_tag|comment| -|:----|:---|:-----|:------| -|ts|TIMESTAMP||timestamp| -|level|VARCHAR||log level| -|content|NCHAR||log content| -|dnode\_id|INT|TAG|dnode id| -|dnode\_ep|NCHAR|TAG|dnode endpoint| -|cluster\_id|NCHAR|TAG|cluster id| - ### log\_summary table `log_summary` table contains log summary information records. diff --git a/docs/en/14-reference/02-rest-api/02-rest-api.mdx b/docs/en/14-reference/02-rest-api/02-rest-api.mdx index ea010f42db8b988c597bcd4c9278c0b5d50a3ca7..4da987213cb8c5f21fd7c8defe1f0952b8f65e41 100644 --- a/docs/en/14-reference/02-rest-api/02-rest-api.mdx +++ b/docs/en/14-reference/02-rest-api/02-rest-api.mdx @@ -79,6 +79,12 @@ Parameter Description: - tz: Optional parameter that specifies the timezone of the returned time, following the IANA Time Zone rules, e.g. `America/New_York`. - req_id: Optional parameter that specifies the request id for tracing. +:::note + +URL Encoding. Make sure that parameters are properly encoded. For example, when specifying a timezone you must properly encode special characters. ?tz=Etc/GMT+10 will not work because the <+> plus symbol is recognized as a space in the url. It's best practice to encode all special characters in a parameter. Instead use ?tz=Etc%2FGMT%2B10 for the parameter. + +::: + For example, `http://h1.taos.com:6041/rest/sql/test` is a URL to `h1.taos.com:6041` and sets the default database name to `test`. TDengine supports both Basic authentication and custom authentication mechanisms, and subsequent versions will provide a standard secure digital signature mechanism for authentication. diff --git a/docs/en/14-reference/03-connector/06-rust.mdx b/docs/en/14-reference/03-connector/06-rust.mdx index 986b5cd104e0aef2dadefb60efd6f574576e7a4d..a98683d43c169c6a2f76dc154035c0af84464287 100644 --- a/docs/en/14-reference/03-connector/06-rust.mdx +++ b/docs/en/14-reference/03-connector/06-rust.mdx @@ -31,7 +31,8 @@ Websocket connections are supported on all platforms that can run Go. | connector-rust version | TDengine version | major features | | :----------------: | :--------------: | :--------------------------------------------------: | -| v0.8.12 | 3.0.5.0 or later | TMQ: Get consuming progress and seek offset to consume. | +| v0.9.2 | 3.0.7.0 or later | STMT: Get tag_fields and col_fields under ws. | +| v0.8.12 | 3.0.5.0 | TMQ: Get consuming progress and seek offset to consume. | | v0.8.0 | 3.0.4.0 | Support schemaless insert. | | v0.7.6 | 3.0.3.0 | Support req_id in query. | | v0.6.0 | 3.0.0.0 | Base features. | @@ -648,12 +649,12 @@ stmt.execute()?; //stmt.execute()?; ``` -For a working example, see [GitHub](https://github.com/taosdata/taos-connector-rust/blob/main/examples/bind.rs). +For a working example, see [GitHub](https://github.com/taosdata/taos-connector-rust/blob/main/taos/examples/bind.rs). For information about other structure APIs, see the [Rust documentation](https://docs.rs/taos). -[taos]: https://github.com/taosdata/rust-connector-taos +[taos]: https://github.com/taosdata/taos-connector-rust [r2d2]: https://crates.io/crates/r2d2 [TaosBuilder]: https://docs.rs/taos/latest/taos/struct.TaosBuilder.html [TaosCfg]: https://docs.rs/taos/latest/taos/struct.TaosCfg.html diff --git a/docs/en/14-reference/03-connector/07-python.mdx b/docs/en/14-reference/03-connector/07-python.mdx index f0a59842fecbe783fb2353f62e0ecb2bc59e2d6d..831e79eeb77e844899d7f3ca4319f5304af736eb 100644 --- a/docs/en/14-reference/03-connector/07-python.mdx +++ b/docs/en/14-reference/03-connector/07-python.mdx @@ -1007,13 +1007,12 @@ consumer.close() ### Other sample programs | Example program links | Example program content | -| ------------------------------------------------------------------------------------------------------------- | ------------------- ---- | -| [bind_multi.py](https://github.com/taosdata/taos-connector-python/blob/main/examples/bind-multi.py) | parameter binding, -bind multiple rows at once | -| [bind_row.py](https://github.com/taosdata/taos-connector-python/blob/main/examples/bind-row.py) | bind_row.py +|-----------------------|-------------------------| +| [bind_multi.py](https://github.com/taosdata/taos-connector-python/blob/main/examples/bind-multi.py) | parameter binding, bind multiple rows at once | +| [bind_row.py](https://github.com/taosdata/taos-connector-python/blob/main/examples/bind-row.py) | parameter binding, bind one row at once | | [insert_lines.py](https://github.com/taosdata/taos-connector-python/blob/main/examples/insert-lines.py) | InfluxDB line protocol writing | | [json_tag.py](https://github.com/taosdata/taos-connector-python/blob/main/examples/json-tag.py) | Use JSON type tags | -| [tmq.py](https://github.com/taosdata/taos-connector-python/blob/main/examples/tmq.py) | TMQ subscription | +| [tmq_consumer.py](https://github.com/taosdata/taos-connector-python/blob/main/examples/tmq_consumer.py) | TMQ subscription | ## Other notes diff --git a/docs/en/14-reference/03-connector/index.mdx b/docs/en/14-reference/03-connector/index.mdx index 41206931181f18063ad1701978a6abe26fc1f5f8..4a3e9195d6820d251ae484b6476b971a55956226 100644 --- a/docs/en/14-reference/03-connector/index.mdx +++ b/docs/en/14-reference/03-connector/index.mdx @@ -59,9 +59,9 @@ The different database framework specifications for various programming language | -------------------------------------- | ------------- | --------------- | ------------- | ------------- | ------------- | ------------- | | **Connection Management** | Support | Support | Support | Support | Support | Support | | **Regular Query** | Support | Support | Support | Support | Support | Support | -| **Parameter Binding** | Supported | Not Supported | Support | Support | Not Supported | Support | +| **Parameter Binding** | Supported | Supported | Support | Support | Not Supported | Support | | **Subscription (TMQ) ** | Supported | Support | Support | Not Supported | Not Supported | Support | -| **Schemaless** | Supported | Not Supported | Supported | Not Supported | Not Supported | Not Supported | +| **Schemaless** | Supported | Supported | Supported | Not Supported | Not Supported | Not Supported | | **Bulk Pulling (based on WebSocket) ** | Support | Support | Support | Support | Support | Support | :::warning diff --git a/docs/en/14-reference/05-taosbenchmark.md b/docs/en/14-reference/05-taosbenchmark.md index 2348810d9e20c85a22d4e4f29d949c8598fe024e..38a8048a21706415c4712f25aec366a4cd2afa3c 100644 --- a/docs/en/14-reference/05-taosbenchmark.md +++ b/docs/en/14-reference/05-taosbenchmark.md @@ -364,6 +364,7 @@ The configuration parameters for specifying super table tag columns and data col - **min**: The minimum value of the column/label of the data type. The generated value will equal or large than the minimum value. - **max**: The maximum value of the column/label of the data type. The generated value will less than the maximum value. +- **fun**: This column of data is filled with functions. Currently, only the sin and cos functions are supported. The input parameter is the timestamp and converted to an angle value. The conversion formula is: angle x=input time column ts value % 360. At the same time, it supports coefficient adjustment and random fluctuation factor adjustment, presented in a fixed format expression, such as fun="10\*sin(x)+100\*random(5)", where x represents the angle, ranging from 0 to 360 degrees, and the growth step size is consistent with the time column step size. 10 represents the coefficient of multiplication, 100 represents the coefficient of addition or subtraction, and 5 represents the fluctuation range within a random range of 5%. The currently supported data types are int, bigint, float, and double. Note: The expression is fixed and cannot be reversed. - **values**: The value field of the nchar/binary column/label, which will be chosen randomly from the values. diff --git a/docs/en/14-reference/09-support-platform/index.md b/docs/en/14-reference/09-support-platform/index.md index 7dfa8ac93a9e91eb9238f7b56033a592c241079f..21fe6fc1dc86935d1e27c8a3bdd73e4cc842d895 100644 --- a/docs/en/14-reference/09-support-platform/index.md +++ b/docs/en/14-reference/09-support-platform/index.md @@ -5,7 +5,7 @@ description: This document describes the supported platforms for the TDengine se ## List of supported platforms for TDengine server -| | **Windows Server 2016/2019** | **Windows 10/11** | **CentOS 7.9/8** | **Ubuntu 18/20** | **macOS** | +| | **Windows Server 2016/2019** | **Windows 10/11** | **CentOS 7.9/8** | **Ubuntu 18 or later** | **macOS** | | ------------ | ---------------------------- | ----------------- | ---------------- | ---------------- | --------- | | X64 | ● | ● | ● | ● | ● | | ARM64 | | | ● | | ● | diff --git a/docs/en/14-reference/12-config/index.md b/docs/en/14-reference/12-config/index.md index bf6a45735dc668c2af1deb24c63e7cbed5317530..c0512aef7b5822adf757446e9cb4c698c7b93075 100755 --- a/docs/en/14-reference/12-config/index.md +++ b/docs/en/14-reference/12-config/index.md @@ -166,7 +166,7 @@ Please note the `taoskeeper` needs to be installed and running to create the `lo | Attribute | Description | | ------------- | ---------------------------------------------------------------------------- | -| Applicable | Server Only | +| Applicable | Server and Client | | Meaning | Switch for allowing TDengine to collect and report service usage information | | Value Range | 0: Not allowed; 1: Allowed | | Default Value | 1 | @@ -174,7 +174,7 @@ Please note the `taoskeeper` needs to be installed and running to create the `lo | Attribute | Description | | ------------- | ---------------------------------------------------------------------------- | -| Applicable | Server Only | +| Applicable | Server and Client | | Meaning | Switch for allowing TDengine to collect and report crash related information | | Value Range | 0,1 0: Not allowed; 1: allowed | | Default Value | 1 | @@ -670,6 +670,15 @@ The charset that takes effect is UTF-8. | Value Range | 0: not consistent; 1: consistent. | | Default | 0 | +### smlTsDefaultName + +| Attribute | Description | +| -------- | -------------------------------------------------------- | +| Applicable | Client only | +| Meaning | The name of the time column for schemaless automatic table creation is set through this configuration | +| Type | String | +| Default Value | _ts | + ## Compress Parameters ### compressMsgSize @@ -732,6 +741,15 @@ The charset that takes effect is UTF-8. | Value Range | 0-23 | | Default Value | 0 | +### tmqMaxTopicNum + +| Attribute | Description | +| -------- | ------------------ | +| Applicable | Server Only | +| Meaning | The max num of topics | +| Value Range | 1-10000| +| Default Value | 20 | + ## 3.0 Parameters | # | **Parameter** | **Applicable to 2.x ** | **Applicable to 3.0 ** | Current behavior in 3.0 | diff --git a/docs/en/14-reference/13-schemaless/13-schemaless.md b/docs/en/14-reference/13-schemaless/13-schemaless.md index 3ae9098a73b3cc6f5f1e970886e33c40558a683b..54be18eea32d7e5c08e406ce2a7809f20b4bb0a4 100644 --- a/docs/en/14-reference/13-schemaless/13-schemaless.md +++ b/docs/en/14-reference/13-schemaless/13-schemaless.md @@ -34,7 +34,27 @@ In the schemaless writing data line protocol, each data item in the field_set ne - If there are English double quotes on both sides, it indicates the BINARY(32) type. For example, `"abc"`. - If there are double quotes on both sides and an L prefix, it means NCHAR(32) type. For example, `L"error message"`. -- Spaces, equal signs (=), commas (,), and double quotes (") need to be escaped with a backslash (\\) in front. (All refer to the ASCII character) +- Spaces, equals sign (=), comma (,), double quote ("), and backslash (\\) need to be escaped with a backslash (\\) in front. (All refer to the ASCII character). The rules are as follows: + +| **Serial number** | **Element** | **Escape characters** | +| -------- | ----------- | ----------------------------- | +| 1 | Measurement | Comma, Space | +| 2 | Tag key | Comma, Equals Sign, Space | +| 3 | Tag value | Comma, Equals Sign, Space | +| 4 | Field key | Comma, Equals Sign, Space | +| 5 | Field value | Double quote, Backslash | + +With two contiguous backslashes, the first is interpreted as an escape character. Examples of backslash escape rules are as follows: + +| **Serial number** | **Backslashes** | **Interpreted as** | +| -------- | ----------- | ----------------------------- | +| 1 | \ | \ | +| 2 | \\\\ | \ | +| 3 | \\\\\\ | \\\\ | +| 4 | \\\\\\\\ | \\\\ | +| 5 | \\\\\\\\\\ | \\\\\\ | +| 6 | \\\\\\\\\\\\ | \\\\\\ | + - Numeric types will be distinguished from data types by the suffix. | **Serial number** | **Postfix** | **Mapping type** | **Size (bytes)** | @@ -88,6 +108,8 @@ You can configure smlChildTableName in taos.cfg to specify table names, for exam 8. It is assumed that the order of field_set in a supertable is consistent, meaning that the first record contains all fields and subsequent records store fields in the same order. If the order is not consistent, set smlDataFormat in taos.cfg to false. Otherwise, data will be written out of order and a database error will occur. Note: TDengine 3.0.3.0 and later automatically detect whether order is consistent. This parameter is no longer used. +9. Due to the fact that SQL table names do not support period (.), schemaless has also processed period (.). If there is a period (.) in the table name automatically created by schemaless, it will be automatically replaced with an underscore (\_). If you manually specify a sub table name, if there is a dot (.) in the sub table name, it will also be converted to an underscore (\_) +10. Taos.cfg adds the configuration of smlTsDefaultName (with a string value), which only works on the client side. After configuration, the time column name of the schemaless automatic table creation can be set through this configuration. If not configured, defaults to _ts. :::tip All processing logic of schemaless will still follow TDengine's underlying restrictions on data structures, such as the total length of each row of data cannot exceed 48 KB(64 KB since version 3.0.5.0) and the total length of a tag value cannot exceed 16 KB. See [TDengine SQL Boundary Limits](/taos-sql/limit) for specific constraints in this area. diff --git a/docs/en/25-application/_03-immigrate.md b/docs/en/25-application/_03-immigrate.md index f78042353249a29f7ee634cfc544c6c0914e3251..457a40614e836e8735195b6b7e6c50268cf66662 100644 --- a/docs/en/25-application/_03-immigrate.md +++ b/docs/en/25-application/_03-immigrate.md @@ -338,7 +338,7 @@ Remark: Equivalent function: sum ```sql -Select max(value) from (select first(val) value from table_name interval(10s) fill(linear)) interval(10s) +Select sum(value) from (select first(val) value from table_name interval(10s) fill(linear)) interval(10s) ``` Note: This function has no interpolation requirements, so it can be directly calculated. diff --git a/docs/en/28-releases/01-tdengine.md b/docs/en/28-releases/01-tdengine.md index d05bf1139c8baf5ee35e798735374f67d084227e..6eaa395087879b50e6a1247523bc48c83ba9c200 100644 --- a/docs/en/28-releases/01-tdengine.md +++ b/docs/en/28-releases/01-tdengine.md @@ -6,10 +6,14 @@ description: This document provides download links for all released versions of TDengine 3.x installation packages can be downloaded at the following links: -For TDengine 2.x installation packages by version, please visit [here](https://www.taosdata.com/all-downloads). +For TDengine 2.x installation packages by version, please visit [here](https://tdengine.com/downloads/historical/). import Release from "/components/ReleaseV3"; +## 3.1.0.0 + + + ## 3.0.7.1 diff --git a/docs/zh/05-get-started/03-package.md b/docs/zh/05-get-started/03-package.md index f6d1c85a60ba5bbd08b122266ca42815a58d094c..621effa6fd52f17fd9ae36994dbfe2f0d4dfc52e 100644 --- a/docs/zh/05-get-started/03-package.md +++ b/docs/zh/05-get-started/03-package.md @@ -201,7 +201,7 @@ Active: inactive (dead) -安装后,可以在拥有管理员权限的 cmd 窗口执行 `sc start taosd` 或在 `C:\TDengine` 目录下,运行 `taosd.exe` 来启动 TDengine 服务进程。 +安装后,可以在拥有管理员权限的 cmd 窗口执行 `sc start taosd` 或在 `C:\TDengine` 目录下,运行 `taosd.exe` 来启动 TDengine 服务进程。如需使用 http/REST 服务,请执行 `sc start taosadapter` 或运行 `taosadapter.exe` 来启动 taosAdapter 服务进程。 **TDengine 命令行(CLI)** diff --git a/docs/zh/07-develop/09-udf.md b/docs/zh/07-develop/09-udf.md index ff464376873767f1d6bee28b254d1f58640abffb..bb6a575ccde7166ae84ece2d0da9f438578767ba 100644 --- a/docs/zh/07-develop/09-udf.md +++ b/docs/zh/07-develop/09-udf.md @@ -398,7 +398,7 @@ def finish(buf: bytes) -> output_type: 3. 定义一个标量函数,输入一个时间戳,输出距离这个时间最近的下一个周日。完成这个函数要用到第三方库 moment。我们在这个示例中讲解使用第三方库的注意事项。 4. 定义一个聚合函数,计算某一列最大值和最小值的差, 也就是实现 TDengien 内置的 spread 函数。 同时也包含大量实用的 debug 技巧。 -本文假设你用的是 Linux 系统,且已安装好了 TDengine 3.0.4.0+ 和 Python 3.x。 +本文假设你用的是 Linux 系统,且已安装好了 TDengine 3.0.4.0+ 和 Python 3.7+。 注意:**UDF 内无法通过 print 函数输出日志,需要自己写文件或用 python 内置的 logging 库写文件**。 diff --git a/docs/zh/08-connector/26-rust.mdx b/docs/zh/08-connector/26-rust.mdx index 79a6badfead70c27fc344b1e506aa8ea5afb624d..3e51aa72bb85841e219f89c1b91b4ff4e4f791cc 100644 --- a/docs/zh/08-connector/26-rust.mdx +++ b/docs/zh/08-connector/26-rust.mdx @@ -30,7 +30,8 @@ Websocket 连接支持所有能运行 Rust 的平台。 | Rust 连接器版本 | TDengine 版本 | 主要功能 | | :----------------: | :--------------: | :--------------------------------------------------: | -| v0.8.12 | 3.0.5.0 or later | 消息订阅:获取消费进度及按照指定进度开始消费。 | +| v0.9.2 | 3.0.7.0 or later | STMT:ws 下获取 tag_fields、col_fields。 | +| v0.8.12 | 3.0.5.0 | 消息订阅:获取消费进度及按照指定进度开始消费。 | | v0.8.0 | 3.0.4.0 | 支持无模式写入。 | | v0.7.6 | 3.0.3.0 | 支持在请求中使用 req_id。 | | v0.6.0 | 3.0.0.0 | 基础功能。 | diff --git a/docs/zh/08-connector/index.md b/docs/zh/08-connector/index.md index 92bc8ed0ce81f27ebf3336669e7b60834581a559..6220a46b067b59ea2ce5c93ae03791de1aa0bbf0 100644 --- a/docs/zh/08-connector/index.md +++ b/docs/zh/08-connector/index.md @@ -58,9 +58,9 @@ TDengine 版本更新往往会增加新的功能特性,列表中的连接器 | ------------------------------ | -------- | ---------- | -------- | -------- | ----------- | -------- | | **连接管理** | 支持 | 支持 | 支持 | 支持 | 支持 | 支持 | | **普通查询** | 支持 | 支持 | 支持 | 支持 | 支持 | 支持 | -| **参数绑定** | 支持 | 暂不支持 | 支持 | 支持 | 暂不支持 | 支持 | +| **参数绑定** | 支持 | 支持 | 支持 | 支持 | 暂不支持 | 支持 | | **数据订阅(TMQ)** | 支持 | 支持 | 支持 | 暂不支持 | 暂不支持 | 支持 | -| **Schemaless** | 支持 | 暂不支持 | 支持 | 暂不支持 | 暂不支持 | 支持 | +| **Schemaless** | 支持 | 支持 | 支持 | 暂不支持 | 暂不支持 | 支持 | | **批量拉取(基于 WebSocket)** | 支持 | 支持 | 支持 | 支持 | 支持 | 支持 | :::warning diff --git a/docs/zh/10-deployment/03-k8s.md b/docs/zh/10-deployment/03-k8s.md index b4da31cda371a13d270311c7b9519cb45aeaa4a5..16e2be0dfde51266783087bd8f456cfa36c5ec1f 100644 --- a/docs/zh/10-deployment/03-k8s.md +++ b/docs/zh/10-deployment/03-k8s.md @@ -4,23 +4,31 @@ title: 在 Kubernetes 上部署 TDengine 集群 description: 利用 Kubernetes 部署 TDengine 集群的详细指南 --- -作为面向云原生架构设计的时序数据库,TDengine 支持 Kubernetes 部署。这里介绍如何使用 YAML 文件一步一步从头创建一个 TDengine 集群,并重点介绍 Kubernetes 环境下 TDengine 的常用操作。 +## 概述 + +作为面向云原生架构设计的时序数据库,TDengine 本身就支持 Kubernetes 部署。这里介绍如何使用 YAML 文件从头一步一步创建一个可用于生产使用的高可用 TDengine 集群,并重点介绍 Kubernetes 环境下 TDengine 的常用操作。 + +为了满足[高可用](https://docs.taosdata.com/tdinternal/high-availability/)的需求,集群需要满足如下要求: + +- 3个及以上 dnode :TDengine 的同一个 vgroup 中的多个 vnode ,不允许同时分布在一个 dnode ,所以如果创建3副本的数据库,则 dnode 数大于等于3 +- 3个 mnode :mnode 负责整个集群的管理工作,TDengine 默认是一个 mnode。如果这个 mnode 所在的 dnode 掉线,则整个集群不可用。 +- 数据库的3副本:TDengine 的副本配置是数据库级别,所以数据库3副本可满足在3个 dnode 的集群中,任意一个 dnode 下线,都不影响集群的正常使用。**如果下线** **dnode** **个数为2时,此时集群不可用,****因为****RAFT无法完成选举****。**(企业版:在灾难恢复场景,任一节点数据文件损坏,都可以通过重新拉起dnode进行恢复) ## 前置条件 要使用 Kubernetes 部署管理 TDengine 集群,需要做好如下准备工作。 -* 本文适用 Kubernetes v1.5 以上版本 -* 本文和下一章使用 minikube、kubectl 和 helm 等工具进行安装部署,请提前安装好相应软件 -* Kubernetes 已经安装部署并能正常访问使用或更新必要的容器仓库或其他服务 +- 本文适用 Kubernetes v1.19 以上版本 +- 本文使用 kubectl 工具进行安装部署,请提前安装好相应软件 +- Kubernetes 已经安装部署并能正常访问使用或更新必要的容器仓库或其他服务 以下配置文件也可以从 [GitHub 仓库](https://github.com/taosdata/TDengine-Operator/tree/3.0/src/tdengine) 下载。 ## 配置 Service 服务 -创建一个 Service 配置文件:`taosd-service.yaml`,服务名称 `metadata.name` (此处为 "taosd") 将在下一步中使用到。添加 TDengine 所用到的端口: +创建一个 Service 配置文件:`taosd-service.yaml`,服务名称 `metadata.name` (此处为 "taosd") 将在下一步中使用到。首先添加 TDengine 所用到的端口,然后在选择器设置确定的标签 app (此处为 “tdengine”)。 -```yaml +```YAML --- apiVersion: v1 kind: Service @@ -42,10 +50,11 @@ spec: ## 有状态服务 StatefulSet -根据 Kubernetes 对各类部署的说明,我们将使用 StatefulSet 作为 TDengine 的服务类型。 -创建文件 `tdengine.yaml`,其中 replicas 定义集群节点的数量为 3。节点时区为中国(Asia/Shanghai),每个节点分配 10G 标准(standard)存储。你也可以根据实际情况进行相应修改。 +根据 Kubernetes 对各类部署的说明,我们将使用 StatefulSet 作为 TDengine 的部署资源类型。 创建文件 `tdengine.yaml`,其中 replicas 定义集群节点的数量为 3。节点时区为中国(Asia/Shanghai),每个节点分配 5G 标准(standard)存储(参考[Storage Classes](https://kubernetes.io/docs/concepts/storage/storage-classes/) 配置 storage class )。你也可以根据实际情况进行相应修改。 + +请特别注意startupProbe的配置,在 dnode 的 Pod 掉线一段时间后,再重新启动,这个时候新上线的 dnode 会短暂不可用。如果startupProbe配置过小,Kubernetes 会认为该 Pod 处于不正常的状态,并尝试重启该 Pod,该 dnode 的 Pod 会频繁重启,始终无法恢复到正常状态。参考 [Configure Liveness, Readiness and Startup Probes](https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/) -```yaml +```YAML --- apiVersion: apps/v1 kind: StatefulSet @@ -69,7 +78,7 @@ spec: spec: containers: - name: "tdengine" - image: "tdengine/tdengine:3.0.0.0" + image: "tdengine/tdengine:3.0.7.1" imagePullPolicy: "IfNotPresent" ports: - name: tcp6030 @@ -108,6 +117,12 @@ spec: volumeMounts: - name: taosdata mountPath: /var/lib/taos + startupProbe: + exec: + command: + - taos-check + failureThreshold: 360 + periodSeconds: 10 readinessProbe: exec: command: @@ -129,199 +144,373 @@ spec: storageClassName: "standard" resources: requests: - storage: "10Gi" + storage: "5Gi" ``` ## 使用 kubectl 命令部署 TDengine 集群 -顺序执行以下命令。 +首先创建对应的 namespace,然后顺序执行以下命令: -```bash -kubectl apply -f taosd-service.yaml -kubectl apply -f tdengine.yaml +```Bash +kubectl apply -f taosd-service.yaml -n tdengine-test +kubectl apply -f tdengine.yaml -n tdengine-test ``` 上面的配置将生成一个三节点的 TDengine 集群,dnode 为自动配置,可以使用 show dnodes 命令查看当前集群的节点: -```bash -kubectl exec -i -t tdengine-0 -- taos -s "show dnodes" -kubectl exec -i -t tdengine-1 -- taos -s "show dnodes" -kubectl exec -i -t tdengine-2 -- taos -s "show dnodes" +```Bash +kubectl exec -it tdengine-0 -n tdengine-test -- taos -s "show dnodes" +kubectl exec -it tdengine-1 -n tdengine-test -- taos -s "show dnodes" +kubectl exec -it tdengine-2 -n tdengine-test -- taos -s "show dnodes" ``` 输出如下: -``` +```Bash taos> show dnodes - id | endpoint | vnodes | support_vnodes | status | create_time | note | -============================================================================================================================================ - 1 | tdengine-0.taosd.default.sv... | 0 | 256 | ready | 2022-08-10 13:14:57.285 | | - 2 | tdengine-1.taosd.default.sv... | 0 | 256 | ready | 2022-08-10 13:15:11.302 | | - 3 | tdengine-2.taosd.default.sv... | 0 | 256 | ready | 2022-08-10 13:15:23.290 | | -Query OK, 3 rows in database (0.003655s) + id | endpoint | vnodes | support_vnodes | status | create_time | reboot_time | note | active_code | c_active_code | +============================================================================================================================================================================================================================================= + 1 | tdengine-0.ta... | 0 | 16 | ready | 2023-07-19 17:54:18.552 | 2023-07-19 17:54:18.469 | | | | + 2 | tdengine-1.ta... | 0 | 16 | ready | 2023-07-19 17:54:37.828 | 2023-07-19 17:54:38.698 | | | | + 3 | tdengine-2.ta... | 0 | 16 | ready | 2023-07-19 17:55:01.141 | 2023-07-19 17:55:02.039 | | | | +Query OK, 3 row(s) in set (0.001853s) +``` + +查看当前mnode + +```Bash +kubectl exec -it tdengine-1 -n tdengine-test -- taos -s "show mnodes\G" +taos> show mnodes\G +*************************** 1.row *************************** + id: 1 + endpoint: tdengine-0.taosd.tdengine-test.svc.cluster.local:6030 + role: leader + status: ready +create_time: 2023-07-19 17:54:18.559 +reboot_time: 2023-07-19 17:54:19.520 +Query OK, 1 row(s) in set (0.001282s) +``` + +## 创建mnode + +```Bash +kubectl exec -it tdengine-0 -n tdengine-test -- taos -s "create mnode on dnode 2" +kubectl exec -it tdengine-0 -n tdengine-test -- taos -s "create mnode on dnode 3" +``` + +查看mnode + +```Bash +kubectl exec -it tdengine-1 -n tdengine-test -- taos -s "show mnodes\G" + +taos> show mnodes\G +*************************** 1.row *************************** + id: 1 + endpoint: tdengine-0.taosd.tdengine-test.svc.cluster.local:6030 + role: leader + status: ready +create_time: 2023-07-19 17:54:18.559 +reboot_time: 2023-07-20 09:19:36.060 +*************************** 2.row *************************** + id: 2 + endpoint: tdengine-1.taosd.tdengine-test.svc.cluster.local:6030 + role: follower + status: ready +create_time: 2023-07-20 09:22:05.600 +reboot_time: 2023-07-20 09:22:12.838 +*************************** 3.row *************************** + id: 3 + endpoint: tdengine-2.taosd.tdengine-test.svc.cluster.local:6030 + role: follower + status: ready +create_time: 2023-07-20 09:22:20.042 +reboot_time: 2023-07-20 09:22:23.271 +Query OK, 3 row(s) in set (0.003108s) ``` ## 使能端口转发 利用 kubectl 端口转发功能可以使应用可以访问 Kubernetes 环境运行的 TDengine 集群。 -``` -kubectl port-forward tdengine-0 6041:6041 & +```Plain +kubectl port-forward -n tdengine-test tdengine-0 6041:6041 & ``` 使用 curl 命令验证 TDengine REST API 使用的 6041 接口。 -``` -$ curl -u root:taosdata -d "show databases" 127.0.0.1:6041/rest/sql -Handling connection for 6041 -{"code":0,"column_meta":[["name","VARCHAR",64],["create_time","TIMESTAMP",8],["vgroups","SMALLINT",2],["ntables","BIGINT",8],["replica","TINYINT",1],["strict","VARCHAR",4],["duration","VARCHAR",10],["keep","VARCHAR",32],["buffer","INT",4],["pagesize","INT",4],["pages","INT",4],["minrows","INT",4],["maxrows","INT",4],["comp","TINYINT",1],["precision","VARCHAR",2],["status","VARCHAR",10],["retention","VARCHAR",60],["single_stable","BOOL",1],["cachemodel","VARCHAR",11],["cachesize","INT",4],["wal_level","TINYINT",1],["wal_fsync_period","INT",4],["wal_retention_period","INT",4],["wal_retention_size","BIGINT",8]],"data":[["information_schema",null,null,16,null,null,null,null,null,null,null,null,null,null,null,"ready",null,null,null,null,null,null,null,null,null,null],["performance_schema",null,null,10,null,null,null,null,null,null,null,null,null,null,null,"ready",null,null,null,null,null,null,null,null,null,null]],"rows":2} +```Plain +curl -u root:taosdata -d "show databases" 127.0.0.1:6041/rest/sql +{"code":0,"column_meta":[["name","VARCHAR",64]],"data":[["information_schema"],["performance_schema"],["test"],["test1"]],"rows":4} ``` -## 使用 dashboard 进行图形化管理 +## 集群测试 - minikube 提供 dashboard 命令支持图形化管理界面。 +### 数据准备 -``` -$ minikube dashboard -* Verifying dashboard health ... -* Launching proxy ... -* Verifying proxy health ... -* Opening http://127.0.0.1:46617/api/v1/namespaces/kubernetes-dashboard/services/http:kubernetes-dashboard:/proxy/ in your default browser... -http://127.0.0.1:46617/api/v1/namespaces/kubernetes-dashboard/services/http:kubernetes-dashboard:/proxy/ -``` +#### taosBenchmark -对于某些公有云环境,minikube 绑定在 127.0.0.1 IP 地址上无法通过远程访问,需要使用 kubectl proxy 命令将端口映射到 0.0.0.0 IP 地址上,再通过浏览器访问虚拟机公网 IP 和端口以及相同的 dashboard URL 路径即可远程访问 dashboard。 +通过taosBenchmark 创建一个3副本的数据库,同时写入1亿条数据,同时查看数据 +```Bash +kubectl exec -it tdengine-0 -n tdengine-test -- taosBenchmark -I stmt -d test -n 10000 -t 10000 -a 3 + +# query data +kubectl exec -it tdengine-0 -n tdengine-test -- taos -s "select count(*) from test.meters;" + +taos> select count(*) from test.meters; + count(*) | +======================== + 100000000 | +Query OK, 1 row(s) in set (0.103537s) ``` -$ kubectl proxy --accept-hosts='^.*$' --address='0.0.0.0' -``` + +查看vnode分布,通过show dnodes + +```Bash +kubectl exec -it tdengine-0 -n tdengine-test -- taos -s "show dnodes" + +taos> show dnodes + id | endpoint | vnodes | support_vnodes | status | create_time | reboot_time | note | active_code | c_active_code | +============================================================================================================================================================================================================================================= + 1 | tdengine-0.ta... | 8 | 16 | ready | 2023-07-19 17:54:18.552 | 2023-07-19 17:54:18.469 | | | | + 2 | tdengine-1.ta... | 8 | 16 | ready | 2023-07-19 17:54:37.828 | 2023-07-19 17:54:38.698 | | | | + 3 | tdengine-2.ta... | 8 | 16 | ready | 2023-07-19 17:55:01.141 | 2023-07-19 17:55:02.039 | | | | +Query OK, 3 row(s) in set (0.001357s) +``` + +通过show vgroup 查看 vnode 分布情况 + +```Bash +kubectl exec -it tdengine-0 -n tdengine-test -- taos -s "show test.vgroups" + +taos> show test.vgroups + vgroup_id | db_name | tables | v1_dnode | v1_status | v2_dnode | v2_status | v3_dnode | v3_status | v4_dnode | v4_status | cacheload | cacheelements | tsma | +============================================================================================================================================================================================== + 2 | test | 1267 | 1 | follower | 2 | follower | 3 | leader | NULL | NULL | 0 | 0 | 0 | + 3 | test | 1215 | 1 | follower | 2 | leader | 3 | follower | NULL | NULL | 0 | 0 | 0 | + 4 | test | 1215 | 1 | leader | 2 | follower | 3 | follower | NULL | NULL | 0 | 0 | 0 | + 5 | test | 1307 | 1 | follower | 2 | leader | 3 | follower | NULL | NULL | 0 | 0 | 0 | + 6 | test | 1245 | 1 | follower | 2 | follower | 3 | leader | NULL | NULL | 0 | 0 | 0 | + 7 | test | 1275 | 1 | follower | 2 | leader | 3 | follower | NULL | NULL | 0 | 0 | 0 | + 8 | test | 1231 | 1 | leader | 2 | follower | 3 | follower | NULL | NULL | 0 | 0 | 0 | + 9 | test | 1245 | 1 | follower | 2 | follower | 3 | leader | NULL | NULL | 0 | 0 | 0 | +Query OK, 8 row(s) in set (0.001488s) +``` + +#### 手工创建 + +常见一个三副本的test1,并创建一张表,写入2条数据 + +```Bash +kubectl exec -it tdengine-0 -n tdengine-test -- \ + taos -s \ + "create database if not exists test1 replica 3; + use test1; + create table if not exists t1(ts timestamp, n int); + insert into t1 values(now, 1)(now+1s, 2);" +``` + +通过show test1.vgroup 查看xnode分布情况 + +```Bash +kubectl exec -it tdengine-0 -n tdengine-test -- taos -s "show test1.vgroups" + +taos> show test1.vgroups + vgroup_id | db_name | tables | v1_dnode | v1_status | v2_dnode | v2_status | v3_dnode | v3_status | v4_dnode | v4_status | cacheload | cacheelements | tsma | +============================================================================================================================================================================================== + 10 | test1 | 1 | 1 | follower | 2 | follower | 3 | leader | NULL | NULL | 0 | 0 | 0 | + 11 | test1 | 0 | 1 | follower | 2 | leader | 3 | follower | NULL | NULL | 0 | 0 | 0 | +Query OK, 2 row(s) in set (0.001489s) +``` + +### 容错测试 + +Mnode leader 所在的 dnode 掉线,dnode1 + +```Bash +kubectl get pod -l app=tdengine -n tdengine-test -o wide +NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES +tdengine-0 0/1 ErrImagePull 2 (2s ago) 20m 10.244.2.75 node86 +tdengine-1 1/1 Running 1 (6m48s ago) 20m 10.244.0.59 node84 +tdengine-2 1/1 Running 0 21m 10.244.1.223 node85 +``` + +此时集群mnode发生重新选举,dnode1上的monde 成为leader + +```Bash +kubectl exec -it tdengine-1 -n tdengine-test -- taos -s "show mnodes\G" +Welcome to the TDengine Command Line Interface, Client Version:3.0.7.1.202307190706 +Copyright (c) 2022 by TDengine, all rights reserved. + +taos> show mnodes\G +*************************** 1.row *************************** + id: 1 + endpoint: tdengine-0.taosd.tdengine-test.svc.cluster.local:6030 + role: offline + status: offline +create_time: 2023-07-19 17:54:18.559 +reboot_time: 1970-01-01 08:00:00.000 +*************************** 2.row *************************** + id: 2 + endpoint: tdengine-1.taosd.tdengine-test.svc.cluster.local:6030 + role: leader + status: ready +create_time: 2023-07-20 09:22:05.600 +reboot_time: 2023-07-20 09:32:00.227 +*************************** 3.row *************************** + id: 3 + endpoint: tdengine-2.taosd.tdengine-test.svc.cluster.local:6030 + role: follower + status: ready +create_time: 2023-07-20 09:22:20.042 +reboot_time: 2023-07-20 09:32:00.026 +Query OK, 3 row(s) in set (0.001513s) +``` + +集群可以正常读写 + +```Bash +# insert +kubectl exec -it tdengine-0 -n tdengine-test -- taos -s "insert into test1.t1 values(now, 1)(now+1s, 2);" + +taos> insert into test1.t1 values(now, 1)(now+1s, 2); +Insert OK, 2 row(s) affected (0.002098s) + +# select +kubectl exec -it tdengine-0 -n tdengine-test -- taos -s "select *from test1.t1" + +taos> select *from test1.t1 + ts | n | +======================================== + 2023-07-19 18:04:58.104 | 1 | + 2023-07-19 18:04:59.104 | 2 | + 2023-07-19 18:06:00.303 | 1 | + 2023-07-19 18:06:01.303 | 2 | +Query OK, 4 row(s) in set (0.001994s) +``` + +同理,至于非leader得mnode掉线,读写当然可以正常进行,这里就不做过多的展示。 ## 集群扩容 TDengine 集群支持自动扩容: -```bash +```Bash kubectl scale statefulsets tdengine --replicas=4 ``` 上面命令行中参数 `--replica=4` 表示要将 TDengine 集群扩容到 4 个节点,执行后首先检查 POD 的状态: -```bash -kubectl get pods -l app=tdengine +```Bash +kubectl get pod -l app=tdengine -n tdengine-test -o wide ``` 输出如下: -``` -NAME READY STATUS RESTARTS AGE -tdengine-0 1/1 Running 0 161m -tdengine-1 1/1 Running 0 161m -tdengine-2 1/1 Running 0 32m -tdengine-3 1/1 Running 0 32m +```Plain +NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES +tdengine-0 1/1 Running 4 (6h26m ago) 6h53m 10.244.2.75 node86 +tdengine-1 1/1 Running 1 (6h39m ago) 6h53m 10.244.0.59 node84 +tdengine-2 1/1 Running 0 5h16m 10.244.1.224 node85 +tdengine-3 1/1 Running 0 3m24s 10.244.2.76 node86 ``` -此时 POD 的状态仍然是 Running,TDengine 集群中的 dnode 状态要等 POD 状态为 `ready` 之后才能看到: +此时 Pod 的状态仍然是 Running,TDengine 集群中的 dnode 状态要等 Pod 状态为 `ready` 之后才能看到: -```bash -kubectl exec -i -t tdengine-3 -- taos -s "show dnodes" +```Bash +kubectl exec -it tdengine-3 -n tdengine-test -- taos -s "show dnodes" ``` 扩容后的四节点 TDengine 集群的 dnode 列表: -``` +```Plain taos> show dnodes - id | endpoint | vnodes | support_vnodes | status | create_time | note | -============================================================================================================================================ - 1 | tdengine-0.taosd.default.sv... | 0 | 256 | ready | 2022-08-10 13:14:57.285 | | - 2 | tdengine-1.taosd.default.sv... | 0 | 256 | ready | 2022-08-10 13:15:11.302 | | - 3 | tdengine-2.taosd.default.sv... | 0 | 256 | ready | 2022-08-10 13:15:23.290 | | - 4 | tdengine-3.taosd.default.sv... | 0 | 256 | ready | 2022-08-10 13:33:16.039 | | -Query OK, 4 rows in database (0.008377s) + id | endpoint | vnodes | support_vnodes | status | create_time | reboot_time | note | active_code | c_active_code | +============================================================================================================================================================================================================================================= + 1 | tdengine-0.ta... | 10 | 16 | ready | 2023-07-19 17:54:18.552 | 2023-07-20 09:39:04.297 | | | | + 2 | tdengine-1.ta... | 10 | 16 | ready | 2023-07-19 17:54:37.828 | 2023-07-20 09:28:24.240 | | | | + 3 | tdengine-2.ta... | 10 | 16 | ready | 2023-07-19 17:55:01.141 | 2023-07-20 10:48:43.445 | | | | + 4 | tdengine-3.ta... | 0 | 16 | ready | 2023-07-20 16:01:44.007 | 2023-07-20 16:01:44.889 | | | | +Query OK, 4 row(s) in set (0.003628s) ``` ## 集群缩容 -由于 TDengine 集群在扩缩容时会对数据进行节点间迁移,使用 kubectl 命令进行缩容需要首先使用 "drop dnodes" 命令,节点删除完成后再进行 Kubernetes 集群缩容。 +由于 TDengine 集群在扩缩容时会对数据进行节点间迁移,使用 kubectl 命令进行缩容需要首先使用 "drop dnodes" 命令(**如果集群中存在3副本的db,那么缩容后的** **dnode** **个数也要必须大于等于3,否则drop dnode操作会被中止**),然后再节点删除完成后再进行 Kubernetes 集群缩容。 注意:由于 Kubernetes Statefulset 中 Pod 的只能按创建顺序逆序移除,所以 TDengine drop dnode 也需要按照创建顺序逆序移除,否则会导致 Pod 处于错误状态。 -``` -$ kubectl exec -i -t tdengine-0 -- taos -s "drop dnode 4" -``` - -```bash -$ kubectl exec -it tdengine-0 -- taos -s "show dnodes" +```Bash +kubectl exec -it tdengine-0 -n tdengine-test -- taos -s "drop dnode 4" +kubectl exec -it tdengine-0 -n tdengine-test -- taos -s "show dnodes" taos> show dnodes - id | endpoint | vnodes | support_vnodes | status | create_time | note | -============================================================================================================================================ - 1 | tdengine-0.taosd.default.sv... | 0 | 256 | ready | 2022-08-10 13:14:57.285 | | - 2 | tdengine-1.taosd.default.sv... | 0 | 256 | ready | 2022-08-10 13:15:11.302 | | - 3 | tdengine-2.taosd.default.sv... | 0 | 256 | ready | 2022-08-10 13:15:23.290 | | -Query OK, 3 rows in database (0.004861s) + id | endpoint | vnodes | support_vnodes | status | create_time | reboot_time | note | active_code | c_active_code | +============================================================================================================================================================================================================================================= + 1 | tdengine-0.ta... | 10 | 16 | ready | 2023-07-19 17:54:18.552 | 2023-07-20 09:39:04.297 | | | | + 2 | tdengine-1.ta... | 10 | 16 | ready | 2023-07-19 17:54:37.828 | 2023-07-20 09:28:24.240 | | | | + 3 | tdengine-2.ta... | 10 | 16 | ready | 2023-07-19 17:55:01.141 | 2023-07-20 10:48:43.445 | | | | +Query OK, 3 row(s) in set (0.003324s) ``` 确认移除成功后(使用 kubectl exec -i -t tdengine-0 -- taos -s "show dnodes" 查看和确认 dnode 列表),使用 kubectl 命令移除 POD: -``` -kubectl scale statefulsets tdengine --replicas=3 +```Plain +kubectl scale statefulsets tdengine --replicas=3 -n tdengine-test ``` 最后一个 POD 将会被删除。使用命令 kubectl get pods -l app=tdengine 查看POD状态: -``` -$ kubectl get pods -l app=tdengine -NAME READY STATUS RESTARTS AGE -tdengine-0 1/1 Running 0 4m7s -tdengine-1 1/1 Running 0 3m55s -tdengine-2 1/1 Running 0 2m28s +```Plain +kubectl get pod -l app=tdengine -n tdengine-test -o wide +NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES +tdengine-0 1/1 Running 4 (6h55m ago) 7h22m 10.244.2.75 node86 +tdengine-1 1/1 Running 1 (7h9m ago) 7h23m 10.244.0.59 node84 +tdengine-2 1/1 Running 0 5h45m 10.244.1.224 node85 ``` POD删除后,需要手动删除PVC,否则下次扩容时会继续使用以前的数据导致无法正常加入集群。 -```bash -$ kubectl delete pvc taosdata-tdengine-3 +```Bash +kubectl delete pvc aosdata-tdengine-3 -n tdengine-test ``` 此时的集群状态是安全的,需要时还可以再次进行扩容: -```bash -$ kubectl scale statefulsets tdengine --replicas=4 +```Bash +kubectl scale statefulsets tdengine --replicas=4 -n tdengine-test statefulset.apps/tdengine scaled -it@k8s-2:~/TDengine-Operator/src/tdengine$ kubectl get pods -l app=tdengine -NAME READY STATUS RESTARTS AGE -tdengine-0 1/1 Running 0 35m -tdengine-1 1/1 Running 0 34m -tdengine-2 1/1 Running 0 12m -tdengine-3 0/1 ContainerCreating 0 4s -it@k8s-2:~/TDengine-Operator/src/tdengine$ kubectl get pods -l app=tdengine -NAME READY STATUS RESTARTS AGE -tdengine-0 1/1 Running 0 35m -tdengine-1 1/1 Running 0 34m -tdengine-2 1/1 Running 0 12m -tdengine-3 0/1 Running 0 7s -it@k8s-2:~/TDengine-Operator/src/tdengine$ kubectl exec -it tdengine-0 -- taos -s "show dnodes" + +kubectl get pod -l app=tdengine -n tdengine-test -o wide +NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES +tdengine-0 1/1 Running 4 (6h59m ago) 7h27m 10.244.2.75 node86 +tdengine-1 1/1 Running 1 (7h13m ago) 7h27m 10.244.0.59 node84 +tdengine-2 1/1 Running 0 5h49m 10.244.1.224 node85 +tdengine-3 1/1 Running 0 20s 10.244.2.77 node86 + +kubectl exec -it tdengine-0 -n tdengine-test -- taos -s "show dnodes" taos> show dnodes -id | endpoint | vnodes | support_vnodes | status | create_time | offline reason | -====================================================================================================================================== -1 | tdengine-0.taosd.default.sv... | 0 | 4 | ready | 2022-07-25 17:38:49.012 | | -2 | tdengine-1.taosd.default.sv... | 1 | 4 | ready | 2022-07-25 17:39:01.517 | | -5 | tdengine-2.taosd.default.sv... | 0 | 4 | ready | 2022-07-25 18:01:36.479 | | -6 | tdengine-3.taosd.default.sv... | 0 | 4 | ready | 2022-07-25 18:13:54.411 | | -Query OK, 4 row(s) in set (0.001348s) + id | endpoint | vnodes | support_vnodes | status | create_time | reboot_time | note | active_code | c_active_code | +============================================================================================================================================================================================================================================= + 1 | tdengine-0.ta... | 10 | 16 | ready | 2023-07-19 17:54:18.552 | 2023-07-20 09:39:04.297 | | | | + 2 | tdengine-1.ta... | 10 | 16 | ready | 2023-07-19 17:54:37.828 | 2023-07-20 09:28:24.240 | | | | + 3 | tdengine-2.ta... | 10 | 16 | ready | 2023-07-19 17:55:01.141 | 2023-07-20 10:48:43.445 | | | | + 5 | tdengine-3.ta... | 0 | 16 | ready | 2023-07-20 16:31:34.092 | 2023-07-20 16:38:17.419 | | | | +Query OK, 4 row(s) in set (0.003881s) ``` ## 清理 TDengine 集群 -完整移除 TDengine 集群,需要分别清理 statefulset、svc、configmap、pvc。 +> **删除pvc时需要注意下pv persistentVolumeReclaimPolicy策略,建议改为Delete,这样在删除pvc时才会自动清理pv,同时会清理底层的csi存储资源,如果没有配置删除pvc自动清理pv的策略,再删除pvc后,在手动清理pv时,pv对应的csi存储资源可能不会被释放。** -```bash -kubectl delete statefulset -l app=tdengine -kubectl delete svc -l app=tdengine -kubectl delete pvc -l app=tdengine -kubectl delete configmap taoscfg +完整移除 TDengine 集群,需要分别清理 statefulset、svc、configmap、pvc。 +```Bash +kubectl delete statefulset -l app=tdengine -n tdengine-test +kubectl delete svc -l app=tdengine -n tdengine-test +kubectl delete pvc -l app=tdengine -n tdengine-test +kubectl delete configmap taoscfg -n tdengine-test ``` ## 常见错误 @@ -330,65 +519,26 @@ kubectl delete configmap taoscfg 未进行 "drop dnode" 直接进行缩容,由于 TDengine 尚未删除节点,缩容 pod 导致 TDengine 集群中部分节点处于 offline 状态。 -``` -$ kubectl exec -it tdengine-0 -- taos -s "show dnodes" +```Plain +kubectl exec -it tdengine-0 -n tdengine-test -- taos -s "show dnodes" taos> show dnodes -id | endpoint | vnodes | support_vnodes | status | create_time | offline reason | -====================================================================================================================================== -1 | tdengine-0.taosd.default.sv... | 0 | 4 | ready | 2022-07-25 17:38:49.012 | | -2 | tdengine-1.taosd.default.sv... | 1 | 4 | ready | 2022-07-25 17:39:01.517 | | -5 | tdengine-2.taosd.default.sv... | 0 | 4 | offline | 2022-07-25 18:01:36.479 | status msg timeout | -6 | tdengine-3.taosd.default.sv... | 0 | 4 | offline | 2022-07-25 18:13:54.411 | status msg timeout | -Query OK, 4 row(s) in set (0.001323s) -``` - -### 错误二 - -TDengine 集群会持有 replica 参数,如果缩容后的节点数小于这个值,集群将无法使用: - -创建一个库使用 replica 参数为 2,插入部分数据: - -```bash -kubectl exec -i -t tdengine-0 -- \ - taos -s \ - "create database if not exists test replica 2; - use test; - create table if not exists t1(ts timestamp, n int); - insert into t1 values(now, 1)(now+1s, 2);" - - -``` - -缩容到单节点: - -```bash -kubectl scale statefulsets tdengine --replicas=1 - -``` - -在 TDengine CLI 中的所有数据库操作将无法成功。 - + id | endpoint | vnodes | support_vnodes | status | create_time | reboot_time | note | active_code | c_active_code | +============================================================================================================================================================================================================================================= + 1 | tdengine-0.ta... | 10 | 16 | ready | 2023-07-19 17:54:18.552 | 2023-07-20 09:39:04.297 | | | | + 2 | tdengine-1.ta... | 10 | 16 | ready | 2023-07-19 17:54:37.828 | 2023-07-20 09:28:24.240 | | | | + 3 | tdengine-2.ta... | 10 | 16 | ready | 2023-07-19 17:55:01.141 | 2023-07-20 10:48:43.445 | | | | + 5 | tdengine-3.ta... | 0 | 16 | offline | 2023-07-20 16:31:34.092 | 2023-07-20 16:38:17.419 | status msg timeout | | | +Query OK, 4 row(s) in set (0.003862s) ``` -taos> show dnodes; - id | end_point | vnodes | cores | status | role | create_time | offline reason | -====================================================================================================================================== - 1 | tdengine-0.taosd.default.sv... | 2 | 40 | ready | any | 2021-06-01 15:55:52.562 | | - 2 | tdengine-1.taosd.default.sv... | 1 | 40 | offline | any | 2021-06-01 15:56:07.212 | status msg timeout | -Query OK, 2 row(s) in set (0.000845s) -taos> show dnodes; - id | end_point | vnodes | cores | status | role | create_time | offline reason | -====================================================================================================================================== - 1 | tdengine-0.taosd.default.sv... | 2 | 40 | ready | any | 2021-06-01 15:55:52.562 | | - 2 | tdengine-1.taosd.default.sv... | 1 | 40 | offline | any | 2021-06-01 15:56:07.212 | status msg timeout | -Query OK, 2 row(s) in set (0.000837s) +## 最后 -taos> use test; -Database changed. +对于在 Kubernetes 环境下 TDengine 的高可用和高可靠来说,对于硬件损坏、灾难恢复,分为两个层面来讲: -taos> insert into t1 values(now, 3); +1. 底层的分布式块存储具备的灾难恢复能力,块存储的多副本,当下流行的分布式块存储如 Ceph,就具备多副本能力,将存储副本扩展到不同的机架、机柜、机房、数据中心(或者直接使用公有云厂商提供的块存储服务) +2. TDengine的灾难恢复,在 TDengine Enterprise 中,本身具备了当一个 dnode 永久下线(物理机磁盘损坏,数据分拣丢失)后,重新拉起一个空白的dnode来恢复原dnode的工作。 -DB error: Unable to resolve FQDN (0.013874s) +最后,欢迎使用[TDengine Cloud](https://cloud.taosdata.com/),来体验一站式全托管的TDengine云服务。 -``` +> TDengine Cloud 是一个极简的全托管时序数据处理云服务平台,它是基于开源的时序数据库 TDengine 而开发的。除高性能的时序数据库之外,它还具有缓存、订阅和流计算等系统功能,而且提供了便利而又安全的数据分享、以及众多的企业级功能。它可以让物联网、工业互联网、金融、IT 运维监控等领域企业在时序数据的管理上大幅降低人力成本和运营成本。 diff --git a/docs/zh/12-taos-sql/01-data-type.md b/docs/zh/12-taos-sql/01-data-type.md index 4a4c1d6ec69b95e8cf3c38b5b5a8d2a4cb335d62..1df07e7e7f009d202c6fe71a1940dc6ab388d845 100644 --- a/docs/zh/12-taos-sql/01-data-type.md +++ b/docs/zh/12-taos-sql/01-data-type.md @@ -42,12 +42,21 @@ CREATE DATABASE db_name PRECISION 'ns'; | 14 | NCHAR | 自定义 | 记录包含多字节字符在内的字符串,如中文字符。每个 NCHAR 字符占用 4 字节的存储空间。字符串两端使用单引号引用,字符串内的单引号需用转义字符 `\'`。NCHAR 使用时须指定字符串大小,类型为 NCHAR(10) 的列表示此列的字符串最多存储 10 个 NCHAR 字符。如果用户字符串长度超出声明长度,将会报错。 | | 15 | JSON | | JSON 数据类型, 只有 Tag 可以是 JSON 格式 | | 16 | VARCHAR | 自定义 | BINARY 类型的别名 | +| 17 | GEOMETRY | 自定义 | 几何类型 | :::note -- 表的每行长度不能超过 48KB(从 3.0.5.0 版本开始为 64KB)(注意:每个 BINARY/NCHAR 类型的列还会额外占用 2 个字节的存储位置)。 +- 表的每行长度不能超过 48KB(从 3.0.5.0 版本开始为 64KB)(注意:每个 BINARY/NCHAR/GEOMETRY 类型的列还会额外占用 2 个字节的存储位置)。 - 虽然 BINARY 类型在底层存储上支持字节型的二进制字符,但不同编程语言对二进制数据的处理方式并不保证一致,因此建议在 BINARY 类型中只存储 ASCII 可见字符,而避免存储不可见字符。多字节的数据,例如中文字符,则需要使用 NCHAR 类型进行保存。如果强行使用 BINARY 类型保存中文字符,虽然有时也能正常读写,但并不带有字符集信息,很容易出现数据乱码甚至数据损坏等情况。 - BINARY 类型理论上最长可以有 16,374(从 3.0.5.0 版本开始,数据列为 65,517,标签列为 16,382) 字节。BINARY 仅支持字符串输入,字符串两端需使用单引号引用。使用时须指定大小,如 BINARY(20) 定义了最长为 20 个单字节字符的字符串,每个字符占 1 字节的存储空间,总共固定占用 20 字节的空间,此时如果用户字符串超出 20 字节将会报错。对于字符串内的单引号,可以用转义字符反斜线加单引号来表示,即 `\'`。 +- GEOMETRY 类型数据列为最大长度为 65,517 字节,标签列最大长度为 16,382 字节。支持 2D 的 POINT、LINESTRING 和 POLYGON 子类型数据。长度计算方式如下表所示: + + | # | **语法** | **最小长度** | **最大长度** | **每组坐标长度增长** | + |---|--------------------------------------|----------|------------|--------------| + | 1 | POINT(1.0 1.0) | 21 | 21 | 无 | + | 2 | LINESTRING(1.0 1.0, 2.0 2.0) | 9+2*16 | 9+4094*16 | +16 | + | 3 | POLYGON((1.0 1.0, 2.0 2.0, 1.0 1.0)) | 13+3*16 | 13+4094*16 | +16 | + - SQL 语句中的数值类型将依据是否存在小数点,或使用科学计数法表示,来判断数值类型是否为整型或者浮点型,因此在使用时要注意相应类型越界的情况。例如,9999999999999999999 会认为超过长整型的上边界而溢出,而 9999999999999999999.0 会被认为是有效的浮点数。 ::: diff --git a/docs/zh/12-taos-sql/02-database.md b/docs/zh/12-taos-sql/02-database.md index de104b68343e9013637d4befe5906357f5d951da..ac435debea6bc2652f9ba0b30cadb90866ee1d22 100644 --- a/docs/zh/12-taos-sql/02-database.md +++ b/docs/zh/12-taos-sql/02-database.md @@ -73,7 +73,7 @@ database_option: { - TABLE_PREFIX:当其为正值时,在决定把一个表分配到哪个 vgroup 时要忽略表名中指定长度的前缀;当其为负值时,在决定把一个表分配到哪个 vgroup 时只使用表名中指定长度的前缀;例如,假定表名为 "v30001",当 TSDB_PREFIX = 2 时 使用 "0001" 来决定分配到哪个 vgroup ,当 TSDB_PREFIX = -2 时使用 "v3" 来决定分配到哪个 vgroup - TABLE_SUFFIX:当其为正值时,在决定把一个表分配到哪个 vgroup 时要忽略表名中指定长度的后缀;当其为负值时,在决定把一个表分配到哪个 vgroup 时只使用表名中指定长度的后缀;例如,假定表名为 "v30001",当 TSDB_SUFFIX = 2 时 使用 "v300" 来决定分配到哪个 vgroup ,当 TSDB_SUFFIX = -2 时使用 "01" 来决定分配到哪个 vgroup。 - TSDB_PAGESIZE:一个 VNODE 中时序数据存储引擎的页大小,单位为 KB,默认为 4 KB。范围为 1 到 16384,即 1 KB到 16 MB。 -- WAL_RETENTION_PERIOD: 为了数据订阅消费,需要WAL日志文件额外保留的最大时长策略。WAL日志清理,不受订阅客户端消费状态影响。单位为 s。默认为 0,表示无需为订阅保留。新建订阅,应先设置恰当的时长策略。 +- WAL_RETENTION_PERIOD: 为了数据订阅消费,需要WAL日志文件额外保留的最大时长策略。WAL日志清理,不受订阅客户端消费状态影响。单位为 s。默认为 3600,表示在 WAL 保留最近 3600 秒的数据,请根据数据订阅的需要修改这个参数为适当值。 - WAL_RETENTION_SIZE:为了数据订阅消费,需要WAL日志文件额外保留的最大累计大小策略。单位为 KB。默认为 0,表示累计大小无上限。 ### 创建数据库示例 @@ -82,7 +82,7 @@ create database if not exists db vgroups 10 buffer 10 ``` -以上示例创建了一个有 10 个 vgroup 名为 db 的数据库, 其中每个 vnode 分配也 10MB 的写入缓存 +以上示例创建了一个有 10 个 vgroup 名为 db 的数据库, 其中每个 vnode 分配 10MB 的写入缓存 ### 使用数据库 diff --git a/docs/zh/12-taos-sql/03-table.md b/docs/zh/12-taos-sql/03-table.md index 2e66ac4002f1d535615893d8ddb04f163aa7a498..9258258263282c6d3f9df5aa8dc650ec511a9680 100644 --- a/docs/zh/12-taos-sql/03-table.md +++ b/docs/zh/12-taos-sql/03-table.md @@ -43,12 +43,11 @@ table_option: { 1. 表的第一个字段必须是 TIMESTAMP,并且系统自动将其设为主键; 2. 表名最大长度为 192; -3. 表的每行长度不能超过 48KB(从 3.0.5.0 版本开始为 64KB);(注意:每个 BINARY/NCHAR 类型的列还会额外占用 2 个字节的存储位置) +3. 表的每行长度不能超过 48KB(从 3.0.5.0 版本开始为 64KB);(注意:每个 BINARY/NCHAR/GEOMETRY 类型的列还会额外占用 2 个字节的存储位置) 4. 子表名只能由字母、数字和下划线组成,且不能以数字开头,不区分大小写 -5. 使用数据类型 binary 或 nchar,需指定其最长的字节数,如 binary(20),表示 20 字节; +5. 使用数据类型 BINARY/NCHAR/GEOMETRY,需指定其最长的字节数,如 BINARY(20),表示 20 字节; 6. 为了兼容支持更多形式的表名,TDengine 引入新的转义符 "\`",可以让表名与关键词不冲突,同时不受限于上述表名称合法性约束检查。但是同样具有长度限制要求。使用转义字符以后,不再对转义字符中的内容进行大小写统一。 例如:\`aBc\` 和 \`abc\` 是不同的表名,但是 abc 和 aBc 是相同的表名。 - 需要注意的是转义字符中的内容必须是可打印字符。 **参数说明** diff --git a/docs/zh/12-taos-sql/16-operators.md b/docs/zh/12-taos-sql/16-operators.md index 48e9991799abf99ca868fc30e34f0435054afa0b..0636121edda2d2cb25e54861db4f18b45c2491ee 100644 --- a/docs/zh/12-taos-sql/16-operators.md +++ b/docs/zh/12-taos-sql/16-operators.md @@ -39,7 +39,7 @@ TDengine 支持 `UNION ALL` 和 `UNION` 操作符。UNION ALL 将查询返回的 | 3 | \>, < | 除 BLOB、MEDIUMBLOB 和 JSON 外的所有类型 | 大于,小于 | | 4 | \>=, <= | 除 BLOB、MEDIUMBLOB 和 JSON 外的所有类型 | 大于等于,小于等于 | | 5 | IS [NOT] NULL | 所有类型 | 是否为空值 | -| 6 | [NOT] BETWEEN AND | 除 BOOL、BLOB、MEDIUMBLOB 和 JSON 外的所有类型 | 闭区间比较 | +| 6 | [NOT] BETWEEN AND | 除 BOOL、BLOB、MEDIUMBLOB、JSON 和 GEOMETRY 外的所有类型 | 闭区间比较 | | 7 | IN | 除 BLOB、MEDIUMBLOB 和 JSON 外的所有类型,且不可以为表的时间戳主键列 | 与列表内的任意值相等 | | 8 | LIKE | BINARY、NCHAR 和 VARCHAR | 通配符匹配 | | 9 | MATCH, NMATCH | BINARY、NCHAR 和 VARCHAR | 正则表达式匹配 | diff --git a/docs/zh/12-taos-sql/19-limit.md b/docs/zh/12-taos-sql/19-limit.md index 6c815fc5f0fe8967fe3ae0bf350a16a2e86ded62..73107bf3baecfe38d3c3a4cda8ffdc5d3df89a08 100644 --- a/docs/zh/12-taos-sql/19-limit.md +++ b/docs/zh/12-taos-sql/19-limit.md @@ -10,11 +10,9 @@ description: 合法字符集和命名中的限制规则 2. 允许英文字符或下划线开头,不允许以数字开头 3. 不区分大小写 4. 转义后表(列)名规则: - 为了兼容支持更多形式的表(列)名,TDengine 引入新的转义符 "`"。可用让表名与关键词不冲突,同时不受限于上述表名称合法性约束检查 - 转义后的表(列)名同样受到长度限制要求,且长度计算的时候不计算转义符。使用转义字符以后,不再对转义字符中的内容进行大小写统一 + 为了兼容支持更多形式的表(列)名,TDengine 引入新的转义符 "`"。使用转义字符以后,不再对转义字符中的内容进行大小写统一,即可以保留用户指定表名中的大小写属性。 例如:\`aBc\` 和 \`abc\` 是不同的表(列)名,但是 abc 和 aBc 是相同的表(列)名。 - 需要注意的是转义字符中的内容必须是可打印字符。 ## 密码合法字符集 @@ -48,13 +46,13 @@ description: 合法字符集和命名中的限制规则 ### 转义后表(列)名规则: -为了兼容支持更多形式的表(列)名,TDengine 引入新的转义符 "`",可以避免表名与关键词的冲突,同时不受限于上述表名合法性约束检查,转义符不计入表名的长度。 +为了兼容支持更多形式的表(列)名,TDengine 引入新的转义符 "`",可以避免表名与关键词的冲突,转义符不计入表名的长度。 转义后的表(列)名同样受到长度限制要求,且长度计算的时候不计算转义符。使用转义字符以后,不再对转义字符中的内容进行大小写统一。 例如: \`aBc\` 和 \`abc\` 是不同的表(列)名,但是 abc 和 aBc 是相同的表(列)名。 :::note -转义字符中的内容必须是可打印字符。 +转义字符中的内容必须符合命名规则中的字符约束。 ::: diff --git a/docs/zh/12-taos-sql/22-meta.md b/docs/zh/12-taos-sql/22-meta.md index c0d3db67d30befe050c1d15eb5e66324549603f7..35794ec2699eba8111096022e04632853cfc3056 100644 --- a/docs/zh/12-taos-sql/22-meta.md +++ b/docs/zh/12-taos-sql/22-meta.md @@ -28,15 +28,15 @@ TDengine 内置了一个名为 `INFORMATION_SCHEMA` 的数据库,提供对数 提供 dnode 的相关信息。也可以使用 SHOW DNODES 来查询这些信息。 -| # | **列名** | **数据类型** | **说明** | -| --- | :------------: | ------------ | ------------------------- | +| # | **列名** | **数据类型** | **说明** | +| --- | :------------: | ------------ | ----------------------------------------------------------------------------------------------------- | | 1 | vnodes | SMALLINT | dnode 中的实际 vnode 个数。需要注意,`vnodes` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | -| 2 | support_vnodes | SMALLINT | 最多支持的 vnode 个数 | -| 3 | status | BINARY(10) | 当前状态 | -| 4 | note | BINARY(256) | 离线原因等信息 | -| 5 | id | SMALLINT | dnode id | -| 6 | endpoint | BINARY(134) | dnode 的地址 | -| 7 | create | TIMESTAMP | 创建时间 | +| 2 | support_vnodes | SMALLINT | 最多支持的 vnode 个数 | +| 3 | status | BINARY(10) | 当前状态 | +| 4 | note | BINARY(256) | 离线原因等信息 | +| 5 | id | SMALLINT | dnode id | +| 6 | endpoint | BINARY(134) | dnode 的地址 | +| 7 | create | TIMESTAMP | 创建时间 | ## INS_MNODES @@ -98,7 +98,7 @@ TDengine 内置了一个名为 `INFORMATION_SCHEMA` 的数据库,提供对数 | 21 | cachesize | INT | 表示每个 vnode 中用于缓存子表最近数据的内存大小。需要注意,`cachesize` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | | 22 | wal_level | INT | WAL 级别。需要注意,`wal_level` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | | 23 | wal_fsync_period | INT | 数据落盘周期。需要注意,`wal_fsync_period` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | -| 24 | wal_retention_period | INT | WAL 的保存时长。需要注意,`wal_retention_period` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | +| 24 | wal_retention_period | INT | WAL 的保存时长,单位为秒。需要注意,`wal_retention_period` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | | 25 | wal_retention_size | INT | WAL 的保存上限。需要注意,`wal_retention_size` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | | 26 | stt_trigger | SMALLINT | 触发文件合并的落盘文件的个数。需要注意,`stt_trigger` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | | 27 | table_prefix | SMALLINT | 内部存储引擎根据表名分配存储该表数据的 VNODE 时要忽略的前缀的长度。需要注意,`table_prefix` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | @@ -109,66 +109,66 @@ TDengine 内置了一个名为 `INFORMATION_SCHEMA` 的数据库,提供对数 用户创建的自定义函数的信息。 -| # | **列名** | **数据类型** | **说明** | -| --- | :---------: | ------------ | -------------- | -| 1 | name | BINARY(64) | 函数名 | -| 2 | comment | BINARY(255) | 补充说明。需要注意,`comment` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | -| 3 | aggregate | INT | 是否为聚合函数。需要注意,`aggregate` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | -| 4 | output_type | BINARY(31) | 输出类型 | -| 5 | create_time | TIMESTAMP | 创建时间 | -| 6 | code_len | INT | 代码长度 | -| 7 | bufsize | INT | buffer 大小 | -| 8 | func_language | BINARY(31) | 自定义函数编程语言 | -| 9 | func_body | BINARY(16384) | 函数体定义 | -| 10 | func_version | INT | 函数版本号。初始版本为0,每次替换更新,版本号加1。| +| # | **列名** | **数据类型** | **说明** | +| --- | :-----------: | ------------- | --------------------------------------------------------------------------------------------- | +| 1 | name | BINARY(64) | 函数名 | +| 2 | comment | BINARY(255) | 补充说明。需要注意,`comment` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | +| 3 | aggregate | INT | 是否为聚合函数。需要注意,`aggregate` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | +| 4 | output_type | BINARY(31) | 输出类型 | +| 5 | create_time | TIMESTAMP | 创建时间 | +| 6 | code_len | INT | 代码长度 | +| 7 | bufsize | INT | buffer 大小 | +| 8 | func_language | BINARY(31) | 自定义函数编程语言 | +| 9 | func_body | BINARY(16384) | 函数体定义 | +| 10 | func_version | INT | 函数版本号。初始版本为0,每次替换更新,版本号加1。 | ## INS_INDEXES 提供用户创建的索引的相关信息。也可以使用 SHOW INDEX 来查询这些信息。 -| # | **列名** | **数据类型** | **说明** | -| --- | :--------------: | ------------ | ---------------------------------------------------------------------------------- | -| 1 | db_name | BINARY(32) | 包含此索引的表所在的数据库名 | -| 2 | table_name | BINARY(192) | 包含此索引的表的名称 | -| 3 | index_name | BINARY(192) | 索引名 | -| 4 | column_name | BINARY(64) | 建索引的列的列名 | -| 5 | index_type | BINARY(10) | 目前有 SMA 和 FULLTEXT | -| 6 | index_extensions | BINARY(256) | 索引的额外信息。对 SMA 类型的索引,是函数名的列表。对 FULLTEXT 类型的索引为 NULL。 | +| # | **列名** | **数据类型** | **说明** | +| --- | :--------------: | ------------ | ------------------------------------------------------- | +| 1 | db_name | BINARY(32) | 包含此索引的表所在的数据库名 | +| 2 | table_name | BINARY(192) | 包含此索引的表的名称 | +| 3 | index_name | BINARY(192) | 索引名 | +| 4 | column_name | BINARY(64) | 建索引的列的列名 | +| 5 | index_type | BINARY(10) | 目前有 SMA 和 tag | +| 6 | index_extensions | BINARY(256) | 索引的额外信息。对 SMA/tag 类型的索引,是函数名的列表。 | ## INS_STABLES 提供用户创建的超级表的相关信息。 -| # | **列名** | **数据类型** | **说明** | -| --- | :-----------: | ------------ | ------------------------ | -| 1 | stable_name | BINARY(192) | 超级表表名 | -| 2 | db_name | BINARY(64) | 超级表所在的数据库的名称 | -| 3 | create_time | TIMESTAMP | 创建时间 | -| 4 | columns | INT | 列数目 | -| 5 | tags | INT | 标签数目。需要注意,`tags` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | -| 6 | last_update | TIMESTAMP | 最后更新时间 | -| 7 | table_comment | BINARY(1024) | 表注释 | -| 8 | watermark | BINARY(64) | 窗口的关闭时间。需要注意,`watermark` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | -| 9 | max_delay | BINARY(64) | 推送计算结果的最大延迟。需要注意,`max_delay` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | -| 10 | rollup | BINARY(128) | rollup 聚合函数。需要注意,`rollup` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | +| # | **列名** | **数据类型** | **说明** | +| --- | :-----------: | ------------ | ----------------------------------------------------------------------------------------------------- | +| 1 | stable_name | BINARY(192) | 超级表表名 | +| 2 | db_name | BINARY(64) | 超级表所在的数据库的名称 | +| 3 | create_time | TIMESTAMP | 创建时间 | +| 4 | columns | INT | 列数目 | +| 5 | tags | INT | 标签数目。需要注意,`tags` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | +| 6 | last_update | TIMESTAMP | 最后更新时间 | +| 7 | table_comment | BINARY(1024) | 表注释 | +| 8 | watermark | BINARY(64) | 窗口的关闭时间。需要注意,`watermark` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | +| 9 | max_delay | BINARY(64) | 推送计算结果的最大延迟。需要注意,`max_delay` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | +| 10 | rollup | BINARY(128) | rollup 聚合函数。需要注意,`rollup` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | ## INS_TABLES 提供用户创建的普通表和子表的相关信息 -| # | **列名** | **数据类型** | **说明** | -| --- | :-----------: | ------------ | ---------------- | -| 1 | table_name | BINARY(192) | 表名 | -| 2 | db_name | BINARY(64) | 数据库名 | -| 3 | create_time | TIMESTAMP | 创建时间 | -| 4 | columns | INT | 列数目 | -| 5 | stable_name | BINARY(192) | 所属的超级表表名 | -| 6 | uid | BIGINT | 表 id | -| 7 | vgroup_id | INT | vgroup id | -| 8 | ttl | INT | 表的生命周期。需要注意,`ttl` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | -| 9 | table_comment | BINARY(1024) | 表注释 | -| 10 | type | BINARY(21) | 表类型 | +| # | **列名** | **数据类型** | **说明** | +| --- | :-----------: | ------------ | ------------------------------------------------------------------------------------- | +| 1 | table_name | BINARY(192) | 表名 | +| 2 | db_name | BINARY(64) | 数据库名 | +| 3 | create_time | TIMESTAMP | 创建时间 | +| 4 | columns | INT | 列数目 | +| 5 | stable_name | BINARY(192) | 所属的超级表表名 | +| 6 | uid | BIGINT | 表 id | +| 7 | vgroup_id | INT | vgroup id | +| 8 | ttl | INT | 表的生命周期。需要注意,`ttl` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | +| 9 | table_comment | BINARY(1024) | 表注释 | +| 10 | type | BINARY(21) | 表类型 | ## INS_TAGS @@ -183,17 +183,17 @@ TDengine 内置了一个名为 `INFORMATION_SCHEMA` 的数据库,提供对数 ## INS_COLUMNS -| # | **列名** | **数据类型** | **说明** | -| --- | :---------: | ------------- | ---------------------- | -| 1 | table_name | BINARY(192) | 表名 | -| 2 | db_name | BINARY(64) | 该表所在的数据库的名称 | -| 3 | table_type | BINARY(21) | 表类型 | -| 4 | col_name | BINARY(64) | 列 的名称 | -| 5 | col_type | BINARY(32) | 列 的类型 | -| 6 | col_length | INT | 列 的长度 | -| 7 | col_precision | INT | 列 的精度 | -| 8 | col_scale | INT | 列 的比例 | -| 9 | col_nullable | INT | 列 是否可以为空 | +| # | **列名** | **数据类型** | **说明** | +| --- | :-----------: | ------------ | ---------------------- | +| 1 | table_name | BINARY(192) | 表名 | +| 2 | db_name | BINARY(64) | 该表所在的数据库的名称 | +| 3 | table_type | BINARY(21) | 表类型 | +| 4 | col_name | BINARY(64) | 列 的名称 | +| 5 | col_type | BINARY(32) | 列 的类型 | +| 6 | col_length | INT | 列 的长度 | +| 7 | col_precision | INT | 列 的精度 | +| 8 | col_scale | INT | 列 的比例 | +| 9 | col_nullable | INT | 列 是否可以为空 | ## INS_USERS @@ -209,60 +209,60 @@ TDengine 内置了一个名为 `INFORMATION_SCHEMA` 的数据库,提供对数 提供企业版授权的相关信息。 -| # | **列名** | **数据类型** | **说明** | -| --- | :---------: | ------------ | -------------------------------------------------- | -| 1 | version | BINARY(9) | 企业版授权说明:official(官方授权的)/trial(试用的) | -| 2 | cpu_cores | BINARY(9) | 授权使用的 CPU 核心数量 | -| 3 | dnodes | BINARY(10) | 授权使用的 dnode 节点数量。需要注意,`dnodes` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | -| 4 | streams | BINARY(10) | 授权创建的流数量。需要注意,`streams` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | -| 5 | users | BINARY(10) | 授权创建的用户数量。需要注意,`users` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | -| 6 | accounts | BINARY(10) | 授权创建的帐户数量。需要注意,`accounts` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | -| 7 | storage | BINARY(21) | 授权使用的存储空间大小。需要注意,`storage` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | -| 8 | connections | BINARY(21) | 授权使用的客户端连接数量。需要注意,`connections` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | -| 9 | databases | BINARY(11) | 授权使用的数据库数量。需要注意,`databases` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | -| 10 | speed | BINARY(9) | 授权使用的数据点每秒写入数量 | -| 11 | querytime | BINARY(9) | 授权使用的查询总时长 | -| 12 | timeseries | BINARY(21) | 授权使用的测点数量 | -| 13 | expired | BINARY(5) | 是否到期,true:到期,false:未到期 | -| 14 | expire_time | BINARY(19) | 试用期到期时间 | +| # | **列名** | **数据类型** | **说明** | +| --- | :---------: | ------------ | --------------------------------------------------------------------------------------------------------- | +| 1 | version | BINARY(9) | 企业版授权说明:official(官方授权的)/trial(试用的) | +| 2 | cpu_cores | BINARY(9) | 授权使用的 CPU 核心数量 | +| 3 | dnodes | BINARY(10) | 授权使用的 dnode 节点数量。需要注意,`dnodes` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | +| 4 | streams | BINARY(10) | 授权创建的流数量。需要注意,`streams` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | +| 5 | users | BINARY(10) | 授权创建的用户数量。需要注意,`users` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | +| 6 | accounts | BINARY(10) | 授权创建的帐户数量。需要注意,`accounts` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | +| 7 | storage | BINARY(21) | 授权使用的存储空间大小。需要注意,`storage` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | +| 8 | connections | BINARY(21) | 授权使用的客户端连接数量。需要注意,`connections` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | +| 9 | databases | BINARY(11) | 授权使用的数据库数量。需要注意,`databases` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | +| 10 | speed | BINARY(9) | 授权使用的数据点每秒写入数量 | +| 11 | querytime | BINARY(9) | 授权使用的查询总时长 | +| 12 | timeseries | BINARY(21) | 授权使用的测点数量 | +| 13 | expired | BINARY(5) | 是否到期,true:到期,false:未到期 | +| 14 | expire_time | BINARY(19) | 试用期到期时间 | ## INS_VGROUPS 系统中所有 vgroups 的信息。 -| # | **列名** | **数据类型** | **说明** | -| --- | :-------: | ------------ | ------------------------------------------------------ | -| 1 | vgroup_id | INT | vgroup id | -| 2 | db_name | BINARY(32) | 数据库名 | -| 3 | tables | INT | 此 vgroup 内有多少表。需要注意,`tables` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | -| 4 | status | BINARY(10) | 此 vgroup 的状态 | -| 5 | v1_dnode | INT | 第一个成员所在的 dnode 的 id | -| 6 | v1_status | BINARY(10) | 第一个成员的状态 | -| 7 | v2_dnode | INT | 第二个成员所在的 dnode 的 id | -| 8 | v2_status | BINARY(10) | 第二个成员的状态 | -| 9 | v3_dnode | INT | 第三个成员所在的 dnode 的 id | -| 10 | v3_status | BINARY(10) | 第三个成员的状态 | -| 11 | nfiles | INT | 此 vgroup 中数据/元数据文件的数量 | -| 12 | file_size | INT | 此 vgroup 中数据/元数据文件的大小 | -| 13 | tsma | TINYINT | 此 vgroup 是否专用于 Time-range-wise SMA,1: 是, 0: 否 | +| # | **列名** | **数据类型** | **说明** | +| --- | :-------: | ------------ | ------------------------------------------------------------------------------------------------ | +| 1 | vgroup_id | INT | vgroup id | +| 2 | db_name | BINARY(32) | 数据库名 | +| 3 | tables | INT | 此 vgroup 内有多少表。需要注意,`tables` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | +| 4 | status | BINARY(10) | 此 vgroup 的状态 | +| 5 | v1_dnode | INT | 第一个成员所在的 dnode 的 id | +| 6 | v1_status | BINARY(10) | 第一个成员的状态 | +| 7 | v2_dnode | INT | 第二个成员所在的 dnode 的 id | +| 8 | v2_status | BINARY(10) | 第二个成员的状态 | +| 9 | v3_dnode | INT | 第三个成员所在的 dnode 的 id | +| 10 | v3_status | BINARY(10) | 第三个成员的状态 | +| 11 | nfiles | INT | 此 vgroup 中数据/元数据文件的数量 | +| 12 | file_size | INT | 此 vgroup 中数据/元数据文件的大小 | +| 13 | tsma | TINYINT | 此 vgroup 是否专用于 Time-range-wise SMA,1: 是, 0: 否 | ## INS_CONFIGS 系统配置参数。 -| # | **列名** | **数据类型** | **说明** | -| --- | :------: | ------------ | ------------ | -| 1 | name | BINARY(32) | 配置项名称 | +| # | **列名** | **数据类型** | **说明** | +| --- | :------: | ------------ | --------------------------------------------------------------------------------------- | +| 1 | name | BINARY(32) | 配置项名称 | | 2 | value | BINARY(64) | 该配置项的值。需要注意,`value` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | ## INS_DNODE_VARIABLES 系统中每个 dnode 的配置参数。 -| # | **列名** | **数据类型** | **说明** | -| --- | :------: | ------------ | ------------ | -| 1 | dnode_id | INT | dnode 的 ID | -| 2 | name | BINARY(32) | 配置项名称 | +| # | **列名** | **数据类型** | **说明** | +| --- | :------: | ------------ | --------------------------------------------------------------------------------------- | +| 1 | dnode_id | INT | dnode 的 ID | +| 2 | name | BINARY(32) | 配置项名称 | | 3 | value | BINARY(64) | 该配置项的值。需要注意,`value` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | ## INS_TOPICS @@ -282,19 +282,29 @@ TDengine 内置了一个名为 `INFORMATION_SCHEMA` 的数据库,提供对数 | 2 | consumer_group | BINARY(193) | 订阅者的消费者组 | | 3 | vgroup_id | INT | 消费者被分配的 vgroup id | | 4 | consumer_id | BIGINT | 消费者的唯一 id | -| 5 | offset | BINARY(64) | 消费者的消费进度 | -| 6 | rows | BIGINT | 消费者的消费的数据条数 | +| 5 | offset | BINARY(64) | 消费者的消费进度 | +| 6 | rows | BIGINT | 消费者的消费的数据条数 | ## INS_STREAMS -| # | **列名** | **数据类型** | **说明** | -| --- | :----------: | ------------ | --------------------------------------- | -| 1 | stream_name | BINARY(64) | 流计算名称 | -| 2 | create_time | TIMESTAMP | 创建时间 | -| 3 | sql | BINARY(1024) | 创建流计算时提供的 SQL 语句 | -| 4 | status | BINARY(20) | 流当前状态 | -| 5 | source_db | BINARY(64) | 源数据库 | -| 6 | target_db | BINARY(64) | 目的数据库 | -| 7 | target_table | BINARY(192) | 流计算写入的目标表 | -| 8 | watermark | BIGINT | watermark,详见 SQL 手册流式计算。需要注意,`watermark` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | +| # | **列名** | **数据类型** | **说明** | +| --- | :----------: | ------------ | -------------------------------------------------------------------------------------------------------------------- | +| 1 | stream_name | BINARY(64) | 流计算名称 | +| 2 | create_time | TIMESTAMP | 创建时间 | +| 3 | sql | BINARY(1024) | 创建流计算时提供的 SQL 语句 | +| 4 | status | BINARY(20) | 流当前状态 | +| 5 | source_db | BINARY(64) | 源数据库 | +| 6 | target_db | BINARY(64) | 目的数据库 | +| 7 | target_table | BINARY(192) | 流计算写入的目标表 | +| 8 | watermark | BIGINT | watermark,详见 SQL 手册流式计算。需要注意,`watermark` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | | 9 | trigger | INT | 计算结果推送模式,详见 SQL 手册流式计算。需要注意,`trigger` 为 TDengine 关键字,作为列名使用时需要使用 ` 进行转义。 | + +## INS_USER_PRIVILEGES + +| # | **列名** | **数据类型** | **说明** | +| --- | :----------: | ------------ | -------------------------------------------------------------------------------------------------------------------- | +| 1 | user_name | VARCHAR(24) | 用户名 +| 2 | privilege | VARCHAR(10) | 权限描述 +| 3 | db_name | VARCHAR(65) | 数据库名称 +| 4 | table_name | VARCHAR(193) | 表名称 +| 5 | condition | VARCHAR(49152) | 子表权限过滤条件 diff --git a/docs/zh/12-taos-sql/27-index.md b/docs/zh/12-taos-sql/27-index.md index 7c301a202de844eaf9647ba8cf58b21538ecab47..da8f38eb225933b89f5b403e374dea3a1604c364 100644 --- a/docs/zh/12-taos-sql/27-index.md +++ b/docs/zh/12-taos-sql/27-index.md @@ -4,12 +4,13 @@ title: 索引 description: 索引功能的使用细节 --- -TDengine 从 3.0.0.0 版本开始引入了索引功能,支持 SMA 索引和 FULLTEXT 索引。 +TDengine 从 3.0.0.0 版本开始引入了索引功能,支持 SMA 索引和 tag 索引。 ## 创建索引 ```sql -CREATE FULLTEXT INDEX index_name ON tb_name (col_name [, col_name] ...) + +CREATE INDEX index_name ON tb_name index_option CREATE SMA INDEX index_name ON tb_name index_option @@ -46,10 +47,6 @@ SELECT _wstart,_wend,_wduration,max(c2),min(c1) FROM st1 INTERVAL(5m,10s) SLIDIN ALTER LOCAL 'querySmaOptimize' '0'; ``` -### FULLTEXT 索引 - -对指定列建立文本索引,可以提升含有文本过滤的查询的性能。FULLTEXT 索引不支持 index_option 语法。现阶段只支持对 JSON 类型的标签列创建 FULLTEXT 索引。不支持多列联合索引,但可以为每个列分布创建 FULLTEXT 索引。 - ## 删除索引 ```sql diff --git a/docs/zh/12-taos-sql/29-changes.md b/docs/zh/12-taos-sql/29-changes.md index 4177fa547ed5a92876e54130e527d8218065f9eb..2a1e5f092cec8ea4fd0f8c77b44843d04071e259 100644 --- a/docs/zh/12-taos-sql/29-changes.md +++ b/docs/zh/12-taos-sql/29-changes.md @@ -18,6 +18,7 @@ description: "TDengine 3.0 版本的语法变更说明" | 8 | 混合运算 | 增强 | 查询中的混合运算(标量运算和矢量运算混合)全面增强,SELECT的各个子句均全面支持符合语法语义的混合运算。 | 9 | 标签运算 | 新增 |在查询中,标签列可以像普通列一样参与各种运算,用于各种子句。 | 10 | 时间线子句和时间函数用于超级表查询 | 增强 |没有PARTITION BY时,超级表的数据会被合并成一条时间线。 +| 11 | GEOMETRY | 新增 | 几何类型。 ## SQL 语句变更 diff --git a/docs/zh/14-reference/05-taosbenchmark.md b/docs/zh/14-reference/05-taosbenchmark.md index 319046ba8f6981ec75feb9095ebfa72b03ed10f0..e4c3efba17788171c9454173a6866c91c1f71d2d 100644 --- a/docs/zh/14-reference/05-taosbenchmark.md +++ b/docs/zh/14-reference/05-taosbenchmark.md @@ -362,6 +362,8 @@ taosBenchmark -A INT,DOUBLE,NCHAR,BINARY\(16\) - **max** : 数据类型的 列/标签 的最大值。生成的值将小于最小值。 +- **fun** : 此列数据以函数填充,目前只支持 sin 和 cos 两函数,输入参数为时间戳换算成角度值,换算公式: 角度 x = 输入的时间列ts值 % 360。同时支持系数调节,随机波动因子调节,以固定格式的表达式展现,如 fun=“10\*sin(x)+100\*random(5)” , x 表示角度,取值 0 ~ 360度,增长步长与时间列步长一致。10 表示乘的系数,100 表示加或减的系数,5 表示波动幅度在 5% 的随机范围内。目前支持的数据类型为 int, bigint, float, double 四种数据类型。注意:表达式为固定模式,不可前后颠倒。 + - **values** : nchar/binary 列/标签的值域,将从值中随机选择。 - **sma**: 将该列加入 SMA 中,值为 "yes" 或者 "no",默认为 "no"。 diff --git a/docs/zh/14-reference/09-support-platform/index.md b/docs/zh/14-reference/09-support-platform/index.md index 500eeeb14c9c1f587435a0223b15ffc6ca840550..c54cbe12e69502192f1cc0934b559651f9eb1b65 100644 --- a/docs/zh/14-reference/09-support-platform/index.md +++ b/docs/zh/14-reference/09-support-platform/index.md @@ -5,7 +5,7 @@ description: "TDengine 服务端、客户端和连接器支持的平台列表" ## TDengine 服务端支持的平台列表 -| | **Windows server 2016/2019** | **Windows 10/11** | **CentOS 7.9/8** | **Ubuntu 18/20** | **统信 UOS** | **银河/中标麒麟** | **凝思 V60/V80** | **macOS** | +| | **Windows server 2016/2019** | **Windows 10/11** | **CentOS 7.9/8** | **Ubuntu 18 以上** | **统信 UOS** | **银河/中标麒麟** | **凝思 V60/V80** | **macOS** | | ------------ | ---------------------------- | ----------------- | ---------------- | ---------------- | ------------ | ----------------- | ---------------- | --------- | | X64 | ● | ● | ● | ● | ● | ● | ● | ● | | 树莓派 ARM64 | | | ● | | | | | | diff --git a/docs/zh/14-reference/12-config/index.md b/docs/zh/14-reference/12-config/index.md index 0e269e59dc9ae3b512be3b987ad6e4fb0dbe114e..2f5f0fc3e81f57e0f1438dab897428be9c83f17d 100755 --- a/docs/zh/14-reference/12-config/index.md +++ b/docs/zh/14-reference/12-config/index.md @@ -184,7 +184,7 @@ taos -C | 属性 | 说明 | | -------- | ------------------------ | -| 适用范围 | 仅服务端适用 | +| 适用范围 | 客户端和服务端都适用 | | 含义 | 是否上传 telemetry | | 取值范围 | 0,1 0: 不上传;1:上传 | | 缺省值 | 1 | @@ -193,7 +193,7 @@ taos -C | 属性 | 说明 | | -------- | ------------------------ | -| 适用范围 | 仅服务端适用 | +| 适用范围 | 客户端和服务端都适用 | | 含义 | 是否上传 crash 信息 | | 取值范围 | 0,1 0: 不上传;1:上传 | | 缺省值 | 1 | @@ -685,7 +685,16 @@ charset 的有效值是 UTF-8。 | 适用范围 | 仅客户端适用 | | 含义 | schemaless 列数据是否顺序一致,从3.0.3.0开始,该配置废弃 | | 值域 | 0:不一致;1: 一致 | -| 缺省值 | 0 | +| 缺省值 | 0 + +### smlTsDefaultName + +| 属性 | 说明 | +| -------- | -------------------------------------------------------- | +| 适用范围 | 仅客户端适用 | +| 含义 | schemaless自动建表的时间列名字通过该配置设置 | +| 类型 | 字符串 | +| 缺省值 | _ts | ## 其他 @@ -736,6 +745,15 @@ charset 的有效值是 UTF-8。 | 取值范围 | 0-23 | | 缺省值 | 0 | +### tmqMaxTopicNum + +| 属性 | 说明 | +| -------- | ------------------ | +| 适用范围 | 仅服务端适用 | +| 含义 | 订阅最多可建立的 topic 数量 | +| 取值范围 | 1-10000| +| 缺省值 | 20 | + ## 压缩参数 ### compressMsgSize diff --git a/docs/zh/14-reference/13-schemaless/13-schemaless.md b/docs/zh/14-reference/13-schemaless/13-schemaless.md index 6c2007938bfc23a609c752b9c02bab4869b19aee..9f5bae081cd274fbf8e91759539227b455a75deb 100644 --- a/docs/zh/14-reference/13-schemaless/13-schemaless.md +++ b/docs/zh/14-reference/13-schemaless/13-schemaless.md @@ -35,12 +35,32 @@ tag_set 中的所有的数据自动转化为 nchar 数据类型,并不需要 - 如果两边有英文双引号,表示 BINARY(32) 类型。例如 `"abc"`。 - 如果两边有英文双引号而且带有 L 前缀,表示 NCHAR(32) 类型。例如 `L"报错信息"`。 -- 对空格、等号(=)、逗号(,)、双引号("),前面需要使用反斜杠(\)进行转义。(都指的是英文半角符号) +- 对空格、等号(=)、逗号(,)、双引号(")、反斜杠(\),前面需要使用反斜杠(\)进行转义。(都指的是英文半角符号)。具体转义规则如下: + +| **序号** | **域** | **需转义字符** | +| -------- | ----------- | ----------------------------- | +| 1 | 超级表名 | 逗号,空格 | +| 2 | 标签名 | 逗号,等号,空格 | +| 3 | 标签值 | 逗号,等号,空格 | +| 4 | 列名 | 逗号,等号,空格 | +| 5 | 列值 | 双引号,反斜杠 | + +两个连续的反斜杠,第一个作为转义符,只有一个反斜杠则无需转义. 反斜杠转义规则举例如下: + +| **序号** | **反斜杠** | **转义为** | +| -------- | ----------- | ----------------------------- | +| 1 | \ | \ | +| 2 | \\\\ | \ | +| 3 | \\\\\\ | \\\\ | +| 4 | \\\\\\\\ | \\\\ | +| 5 | \\\\\\\\\\ | \\\\\\ | +| 6 | \\\\\\\\\\\\ | \\\\\\ | + - 数值类型将通过后缀来区分数据类型: -| **序号** | **后缀** | **映射类型** | **大小(字节)** | +| **序号** | **后缀** | **映射类型** | **大小(字节)** | | -------- | ----------- | ----------------------------- | -------------- | -| 1 | 无或 f64 | double | 8 | +| 1 | 无或 f64 | double | 8 | | 2 | f32 | float | 4 | | 3 | i8/u8 | TinyInt/UTinyInt | 1 | | 4 | i16/u16 | SmallInt/USmallInt | 2 | @@ -84,7 +104,9 @@ st,t1=3,t2=4,t3=t3 c1=3i64,c3="passit",c2=false,c4=4f64 1626006833639000000 6. 对 BINARY 或 NCHAR 列,如果数据行中所提供值的长度超出了列类型的限制,自动增加该列允许存储的字符长度上限(只增不减),以保证数据的完整保存。 7. 整个处理过程中遇到的错误会中断写入过程,并返回错误代码。 8. 为了提高写入的效率,默认假设同一个超级表中 field_set 的顺序是一样的(第一条数据包含所有的 field,后面的数据按照这个顺序),如果顺序不一样,需要配置参数 smlDataFormat 为 false,否则,数据写入按照相同顺序写入,库中数据会异常,从3.0.3.0开始,自动检测顺序是否一致,该配置废弃。 - +9. 由于sql建表表名不支持点号(.),所以schemaless也对点号(.)做了处理,如果schemaless自动建表的表名如果有点号(.),会自动替换为下划线(\_)。如果手动指定子表名的话,子表名里有点号(.),同样转化为下划线(\_)。 +10. taos.cfg 增加 smlTsDefaultName 配置(值为字符串),只在client端起作用,配置后,schemaless自动建表的时间列名字可以通过该配置设置。不配置的话,默认为 _ts + :::tip 无模式所有的处理逻辑,仍会遵循 TDengine 对数据结构的底层限制,例如每行数据的总长度不能超过 48KB(从 3.0.5.0 版本开始为 64KB),标签值的总长度不超过16KB。这方面的具体限制约束请参见 [TDengine SQL 边界限制](/taos-sql/limit) diff --git a/docs/zh/17-operation/10-monitor.md b/docs/zh/17-operation/10-monitor.md index 50da50580834a363d6fb35e94736dc395c60d982..4f8dccc78d80b8f4dd107f481317d8f82c1be207 100644 --- a/docs/zh/17-operation/10-monitor.md +++ b/docs/zh/17-operation/10-monitor.md @@ -210,19 +210,6 @@ TDinsight dashboard 数据来源于 log 库(存放监控数据的默认db, |dnode\_ep|NCHAR|TAG|dnode endpoint| |cluster\_id|NCHAR|TAG|cluster id| -### logs 表 - -`logs` 表记录登录信息。 - -|field|type|is\_tag|comment| -|:----|:---|:-----|:------| -|ts|TIMESTAMP||timestamp| -|level|VARCHAR||log level| -|content|NCHAR||log content,长度不超过1024字节| -|dnode\_id|INT|TAG|dnode id| -|dnode\_ep|NCHAR|TAG|dnode endpoint| -|cluster\_id|NCHAR|TAG|cluster id| - ### log\_summary 表 `log_summary` 记录日志统计信息。 diff --git a/docs/zh/21-tdinternal/01-arch.md b/docs/zh/21-tdinternal/01-arch.md index 32d940abc194a77e77524ace594ff79a49bffc7d..e2480b66825e77b4c5be289f63010f8674cdc390 100644 --- a/docs/zh/21-tdinternal/01-arch.md +++ b/docs/zh/21-tdinternal/01-arch.md @@ -112,7 +112,7 @@ TDengine 3.0 采用 hash 一致性算法,确定每张数据表所在的 vnode ### 数据分区 -TDengine 除 vnode 分片之外,还对时序数据按照时间段进行分区。每个数据文件只包含一个时间段的时序数据,时间段的长度由 DB 的配置参数 days 决定。这种按时间段分区的方法还便于高效实现数据的保留策略,只要数据文件超过规定的天数(系统配置参数 keep),将被自动删除。而且不同的时间段可以存放于不同的路径和存储介质,以便于大数据的冷热管理,实现多级存储。 +TDengine 除 vnode 分片之外,还对时序数据按照时间段进行分区。每个数据文件只包含一个时间段的时序数据,时间段的长度由 DB 的配置参数 duration 决定。这种按时间段分区的方法还便于高效实现数据的保留策略,只要数据文件超过规定的天数(系统配置参数 keep),将被自动删除。而且不同的时间段可以存放于不同的路径和存储介质,以便于大数据的冷热管理,实现多级存储。 总的来说,**TDengine 是通过 vnode 以及时间两个维度,对大数据进行切分**,便于并行高效的管理,实现水平扩展。 diff --git a/docs/zh/25-application/03-immigrate.md b/docs/zh/25-application/03-immigrate.md index 75788c0cc7d6e0e84402ba77c4a1aa875d772d8b..389a2b2c5a31f7357fafe9bf4fd178d811acf464 100644 --- a/docs/zh/25-application/03-immigrate.md +++ b/docs/zh/25-application/03-immigrate.md @@ -371,7 +371,7 @@ Select min(val) from table_name 等效函数:sum ```sql -Select max(value) from (select first(val) value from table_name interval(10s) fill(linear)) interval(10s) +Select sum(value) from (select first(val) value from table_name interval(10s) fill(linear)) interval(10s) ``` 备注:该函数无插值需求,因此可用直接计算。 diff --git a/docs/zh/28-releases/01-tdengine.md b/docs/zh/28-releases/01-tdengine.md index 52bb9c87a0f31aaa6ad7c843149203207b744ae1..afdf2a76d3106b0ba0dd4aaf77c96dcf387ea67e 100644 --- a/docs/zh/28-releases/01-tdengine.md +++ b/docs/zh/28-releases/01-tdengine.md @@ -10,6 +10,10 @@ TDengine 2.x 各版本安装包请访问[这里](https://www.taosdata.com/all-do import Release from "/components/ReleaseV3"; +## 3.1.0.0 + + + ## 3.0.7.1 diff --git a/examples/JDBC/mybatisplus-demo/pom.xml b/examples/JDBC/mybatisplus-demo/pom.xml index 5555145958de67fdf03eb744426afcfc13b6fcb3..f792946c9653ebffa9c78f7380764a71ab76e364 100644 --- a/examples/JDBC/mybatisplus-demo/pom.xml +++ b/examples/JDBC/mybatisplus-demo/pom.xml @@ -47,7 +47,7 @@ com.taosdata.jdbc taos-jdbcdriver - 3.0.0 + 3.2.4 diff --git a/include/client/taos.h b/include/client/taos.h index 7bdf16ed3854256f27328d1ff2f6ad47b4e11bad..3cc2d907ab5ca18d16a9553d336672d67e4f974c 100644 --- a/include/client/taos.h +++ b/include/client/taos.h @@ -287,11 +287,20 @@ DLL_EXPORT TAOS_RES *tmq_consumer_poll(tmq_t *tmq, int64_t timeout); DLL_EXPORT int32_t tmq_consumer_close(tmq_t *tmq); DLL_EXPORT int32_t tmq_commit_sync(tmq_t *tmq, const TAOS_RES *msg); DLL_EXPORT void tmq_commit_async(tmq_t *tmq, const TAOS_RES *msg, tmq_commit_cb *cb, void *param); +DLL_EXPORT int32_t tmq_commit_offset_sync(tmq_t *tmq, const char *pTopicName, int32_t vgId, int64_t offset); +DLL_EXPORT void tmq_commit_offset_async(tmq_t *tmq, const char *pTopicName, int32_t vgId, int64_t offset, tmq_commit_cb *cb, void *param); DLL_EXPORT int32_t tmq_get_topic_assignment(tmq_t *tmq, const char *pTopicName, tmq_topic_assignment **assignment, int32_t *numOfAssignment); DLL_EXPORT void tmq_free_assignment(tmq_topic_assignment* pAssignment); DLL_EXPORT int32_t tmq_offset_seek(tmq_t *tmq, const char *pTopicName, int32_t vgId, int64_t offset); +DLL_EXPORT const char *tmq_get_topic_name(TAOS_RES *res); +DLL_EXPORT const char *tmq_get_db_name(TAOS_RES *res); +DLL_EXPORT int32_t tmq_get_vgroup_id(TAOS_RES *res); +DLL_EXPORT int64_t tmq_get_vgroup_offset(TAOS_RES* res); +DLL_EXPORT int64_t tmq_position(tmq_t *tmq, const char *pTopicName, int32_t vgId); +DLL_EXPORT int64_t tmq_committed(tmq_t *tmq, const char *pTopicName, int32_t vgId); + /* ----------------------TMQ CONFIGURATION INTERFACE---------------------- */ enum tmq_conf_res_t { @@ -309,11 +318,6 @@ DLL_EXPORT void tmq_conf_set_auto_commit_cb(tmq_conf_t *conf, tmq_comm /* -------------------------TMQ MSG HANDLE INTERFACE---------------------- */ -DLL_EXPORT const char *tmq_get_topic_name(TAOS_RES *res); -DLL_EXPORT const char *tmq_get_db_name(TAOS_RES *res); -DLL_EXPORT int32_t tmq_get_vgroup_id(TAOS_RES *res); -DLL_EXPORT int64_t tmq_get_vgroup_offset(TAOS_RES* res); - /* ------------------------------ TAOSX -----------------------------------*/ // note: following apis are unstable enum tmq_res_t { diff --git a/include/common/tglobal.h b/include/common/tglobal.h index d6c552b3f6dc1f3413790aebc9c23ee8b05efc08..8ea0a857e882eb1b23a1dc73da3216bfa144133a 100644 --- a/include/common/tglobal.h +++ b/include/common/tglobal.h @@ -85,8 +85,14 @@ extern int64_t tsVndCommitMaxIntervalMs; extern int64_t tsMndSdbWriteDelta; extern int64_t tsMndLogRetention; extern int8_t tsGrant; +extern int32_t tsMndGrantMode; extern bool tsMndSkipGrant; +// dnode +extern int64_t tsDndStart; +extern int64_t tsDndStartOsUptime; +extern int64_t tsDndUpTime; + // monitor extern bool tsEnableMonitor; extern int32_t tsMonitorInterval; @@ -163,6 +169,8 @@ extern char tsUdfdLdLibPath[]; // schemaless extern char tsSmlChildTableName[]; extern char tsSmlTagName[]; +extern bool tsSmlDot2Underline; +extern char tsSmlTsDefaultName[]; // extern bool tsSmlDataFormat; // extern int32_t tsSmlBatchSize; diff --git a/include/common/tmsg.h b/include/common/tmsg.h index 5dc5b7f51442b5579bea3a42ea065432e82860d5..907ff2c60610d7e364a0f62f62cede9382f18c18 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -1145,6 +1145,7 @@ typedef struct { char timezone[TD_TIMEZONE_LEN]; // tsTimezone char locale[TD_LOCALE_LEN]; // tsLocale char charset[TD_LOCALE_LEN]; // tsCharset + int8_t ttlChangeOnWrite; } SClusterCfg; typedef struct { @@ -1182,6 +1183,8 @@ typedef struct { typedef struct { int8_t syncState; int8_t syncRestore; + int64_t syncTerm; + int64_t roleTimeMs; } SMnodeLoad; typedef struct { @@ -1500,6 +1503,7 @@ int32_t tDeserializeSShowVariablesReq(void* buf, int32_t bufLen, SShowVariablesR typedef struct { char name[TSDB_CONFIG_OPTION_LEN + 1]; char value[TSDB_CONFIG_VALUE_LEN + 1]; + char scope[TSDB_CONFIG_SCOPE_LEN + 1]; } SVariablesInfo; typedef struct { @@ -3383,6 +3387,12 @@ typedef struct { int8_t reserved; } SMqHbRsp; +typedef struct { + SMsgHead head; + int64_t consumerId; + char subKey[TSDB_SUBSCRIBE_KEY_LEN]; +} SMqSeekReq; + #define TD_AUTO_CREATE_TABLE 0x1 typedef struct { int64_t suid; @@ -3512,6 +3522,8 @@ int32_t tSerializeSMqHbReq(void* buf, int32_t bufLen, SMqHbReq* pReq); int32_t tDeserializeSMqHbReq(void* buf, int32_t bufLen, SMqHbReq* pReq); int32_t tDeatroySMqHbReq(SMqHbReq* pReq); +int32_t tSerializeSMqSeekReq(void *buf, int32_t bufLen, SMqSeekReq *pReq); +int32_t tDeserializeSMqSeekReq(void *buf, int32_t bufLen, SMqSeekReq *pReq); #define SUBMIT_REQ_AUTO_CREATE_TABLE 0x1 #define SUBMIT_REQ_COLUMN_DATA_FORMAT 0x2 diff --git a/include/common/tmsgdef.h b/include/common/tmsgdef.h index 9015d2f8e3b581b5639bd06e2c7b83df0ea5865b..b90db3d7e08c9b2ec985a707fe4b76f2ccd4866d 100644 --- a/include/common/tmsgdef.h +++ b/include/common/tmsgdef.h @@ -308,12 +308,13 @@ enum { TD_DEF_MSG_TYPE(TDMT_VND_TMQ_SUBSCRIBE, "vnode-tmq-subscribe", SMqRebVgReq, SMqRebVgRsp) TD_DEF_MSG_TYPE(TDMT_VND_TMQ_DELETE_SUB, "vnode-tmq-delete-sub", SMqVDeleteReq, SMqVDeleteRsp) TD_DEF_MSG_TYPE(TDMT_VND_TMQ_COMMIT_OFFSET, "vnode-tmq-commit-offset", STqOffset, STqOffset) - TD_DEF_MSG_TYPE(TDMT_VND_TMQ_SEEK_TO_OFFSET, "vnode-tmq-seekto-offset", STqOffset, STqOffset) + TD_DEF_MSG_TYPE(TDMT_VND_TMQ_SEEK, "vnode-tmq-seek", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_TMQ_ADD_CHECKINFO, "vnode-tmq-add-checkinfo", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_TMQ_DEL_CHECKINFO, "vnode-del-checkinfo", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_TMQ_CONSUME, "vnode-tmq-consume", SMqPollReq, SMqDataBlkRsp) TD_DEF_MSG_TYPE(TDMT_VND_TMQ_CONSUME_PUSH, "vnode-tmq-consume-push", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_TMQ_VG_WALINFO, "vnode-tmq-vg-walinfo", SMqPollReq, SMqDataBlkRsp) + TD_DEF_MSG_TYPE(TDMT_VND_TMQ_VG_COMMITTEDINFO, "vnode-tmq-committedinfo", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_VND_TMQ_MAX_MSG, "vnd-tmq-max", NULL, NULL) diff --git a/include/libs/executor/executor.h b/include/libs/executor/executor.h index 3bef15f3a7c49b7a89112344b67182b3da9f3696..f90c38f341edccf801d7f7d470228c524a8f794d 100644 --- a/include/libs/executor/executor.h +++ b/include/libs/executor/executor.h @@ -221,13 +221,9 @@ int32_t qStreamSourceScanParamForHistoryScanStep2(qTaskInfo_t tinfo, SVersionRan int32_t qStreamRecoverFinish(qTaskInfo_t tinfo); int32_t qRestoreStreamOperatorOption(qTaskInfo_t tinfo); bool qStreamRecoverScanFinished(qTaskInfo_t tinfo); -bool qStreamRecoverScanStep1Finished(qTaskInfo_t tinfo); -bool qStreamRecoverScanStep2Finished(qTaskInfo_t tinfo); -int32_t qStreamRecoverSetAllStepFinished(qTaskInfo_t tinfo); +int32_t qStreamInfoResetTimewindowFilter(qTaskInfo_t tinfo); void resetTaskInfo(qTaskInfo_t tinfo); -void qResetStreamInfoTimeWindow(qTaskInfo_t tinfo); - int32_t qStreamOperatorReleaseState(qTaskInfo_t tInfo); int32_t qStreamOperatorReloadState(qTaskInfo_t tInfo); diff --git a/include/libs/executor/storageapi.h b/include/libs/executor/storageapi.h index e263c9d236645ed9d3288cfea091152a0e6b19b9..773f373a2d174121a7396bed1f5827e7bb514987 100644 --- a/include/libs/executor/storageapi.h +++ b/include/libs/executor/storageapi.h @@ -228,7 +228,7 @@ typedef struct SStoreTqReader { } SStoreTqReader; typedef struct SStoreSnapshotFn { - int32_t (*createSnapshot)(SSnapContext* ctx, int64_t uid); + int32_t (*setForSnapShot)(SSnapContext* ctx, int64_t uid); int32_t (*destroySnapshot)(SSnapContext* ctx); SMetaTableInfo (*getMetaTableInfoFromSnapshot)(SSnapContext* ctx); int32_t (*getTableInfoFromSnapshot)(SSnapContext* ctx, void** pBuf, int32_t* contLen, int16_t* type, int64_t* uid); @@ -368,6 +368,8 @@ typedef struct SStateStore { bool (*updateInfoIsUpdated)(SUpdateInfo* pInfo, uint64_t tableId, TSKEY ts); bool (*updateInfoIsTableInserted)(SUpdateInfo* pInfo, int64_t tbUid); void (*updateInfoDestroy)(SUpdateInfo* pInfo); + void (*windowSBfDelete)(SUpdateInfo *pInfo, uint64_t count); + void (*windowSBfAdd)(SUpdateInfo *pInfo, uint64_t count); SUpdateInfo* (*updateInfoInitP)(SInterval* pInterval, int64_t watermark); void (*updateInfoAddCloseWindowSBF)(SUpdateInfo* pInfo); diff --git a/include/libs/nodes/cmdnodes.h b/include/libs/nodes/cmdnodes.h index bd0b70c3105bc4fe818cf8ffb166285641846a75..f0a715e6511af931b7e580dae9a83cf7d447e371 100644 --- a/include/libs/nodes/cmdnodes.h +++ b/include/libs/nodes/cmdnodes.h @@ -36,9 +36,10 @@ extern "C" { #define SHOW_CREATE_TB_RESULT_FIELD1_LEN (TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE) #define SHOW_CREATE_TB_RESULT_FIELD2_LEN (TSDB_MAX_ALLOWED_SQL_LEN * 3) -#define SHOW_LOCAL_VARIABLES_RESULT_COLS 2 +#define SHOW_LOCAL_VARIABLES_RESULT_COLS 3 #define SHOW_LOCAL_VARIABLES_RESULT_FIELD1_LEN (TSDB_CONFIG_OPTION_LEN + VARSTR_HEADER_SIZE) #define SHOW_LOCAL_VARIABLES_RESULT_FIELD2_LEN (TSDB_CONFIG_VALUE_LEN + VARSTR_HEADER_SIZE) +#define SHOW_LOCAL_VARIABLES_RESULT_FIELD3_LEN (TSDB_CONFIG_SCOPE_LEN + VARSTR_HEADER_SIZE) #define SHOW_ALIVE_RESULT_COLS 1 @@ -361,7 +362,7 @@ typedef struct SRestoreComponentNodeStmt { typedef struct SCreateTopicStmt { ENodeType type; - char topicName[TSDB_TABLE_NAME_LEN]; + char topicName[TSDB_TOPIC_NAME_LEN]; char subDbName[TSDB_DB_NAME_LEN]; char subSTbName[TSDB_TABLE_NAME_LEN]; bool ignoreExists; @@ -372,13 +373,13 @@ typedef struct SCreateTopicStmt { typedef struct SDropTopicStmt { ENodeType type; - char topicName[TSDB_TABLE_NAME_LEN]; + char topicName[TSDB_TOPIC_NAME_LEN]; bool ignoreNotExists; } SDropTopicStmt; typedef struct SDropCGroupStmt { ENodeType type; - char topicName[TSDB_TABLE_NAME_LEN]; + char topicName[TSDB_TOPIC_NAME_LEN]; char cgroup[TSDB_CGROUP_LEN]; bool ignoreNotExists; } SDropCGroupStmt; diff --git a/include/libs/nodes/plannodes.h b/include/libs/nodes/plannodes.h index 453c5d49142934b79758f3faf5e3c12c80c16a55..063318332a7cdc0a66c983fdfa464ca8f1bf9a8e 100644 --- a/include/libs/nodes/plannodes.h +++ b/include/libs/nodes/plannodes.h @@ -55,6 +55,7 @@ typedef struct SLogicNode { EGroupAction groupAction; EOrder inputTsOrder; EOrder outputTsOrder; + bool forceCreateNonBlockingOptr; // true if the operator can use non-blocking(pipeline) mode } SLogicNode; typedef enum EScanType { @@ -105,6 +106,7 @@ typedef struct SScanLogicNode { bool hasNormalCols; // neither tag column nor primary key tag column bool sortPrimaryKey; bool igLastNull; + bool groupOrderScan; } SScanLogicNode; typedef struct SJoinLogicNode { @@ -246,6 +248,8 @@ typedef struct SSortLogicNode { SLogicNode node; SNodeList* pSortKeys; bool groupSort; + int64_t maxRows; + bool skipPKSortOpt; } SSortLogicNode; typedef struct SPartitionLogicNode { @@ -316,6 +320,7 @@ typedef struct SPhysiNode { struct SPhysiNode* pParent; SNode* pLimit; SNode* pSlimit; + bool forceCreateNonBlockingOptr; } SPhysiNode; typedef struct SScanPhysiNode { @@ -326,6 +331,7 @@ typedef struct SScanPhysiNode { uint64_t suid; int8_t tableType; SName tableName; + bool groupOrderScan; } SScanPhysiNode; typedef SScanPhysiNode STagScanPhysiNode; diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 34a0bc86576d2d9d27d26c15f8da9730d1e4bd35..b4ae30910c14b2c5965e3d19776f6fac09cdd3a1 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -45,8 +45,8 @@ enum { TASK_STATUS__FAIL, TASK_STATUS__STOP, TASK_STATUS__SCAN_HISTORY, // stream task scan history data by using tsdbread in the stream scanner - TASK_STATUS__HALT, // stream task will handle all data in the input queue, and then paused - TASK_STATUS__PAUSE, + TASK_STATUS__HALT, // pause, but not be manipulated by user command + TASK_STATUS__PAUSE, // pause }; enum { @@ -272,6 +272,7 @@ typedef struct SStreamStatus { int8_t keepTaskStatus; bool transferState; int8_t timerActive; // timer is active + int8_t pauseAllowed; // allowed task status to be set to be paused } SStreamStatus; typedef struct SHistDataRange { @@ -296,15 +297,21 @@ typedef struct SDispatchMsgInfo { } SDispatchMsgInfo; typedef struct { - int8_t outputType; - int8_t outputStatus; - SStreamQueue* outputQueue; -} SSTaskOutputInfo; + int8_t type; + int8_t status; + SStreamQueue* queue; +} STaskOutputInfo; + +typedef struct { + int64_t init; + int64_t step1Start; + int64_t step2Start; +} STaskTimestamp; struct SStreamTask { SStreamId id; SSTaskBasicInfo info; - int8_t outputType; + STaskOutputInfo outputInfo; SDispatchMsgInfo msgInfo; SStreamStatus status; SCheckpointInfo chkInfo; @@ -315,7 +322,7 @@ struct SStreamTask { SArray* pUpstreamEpInfoList; // SArray, // children info int32_t nextCheckId; SArray* checkpointInfo; // SArray - + STaskTimestamp tsInfo; // output union { STaskDispatcherFixedEp fixedEpDispatcher; @@ -326,9 +333,7 @@ struct SStreamTask { }; int8_t inputStatus; - int8_t outputStatus; SStreamQueue* inputQueue; - SStreamQueue* outputQueue; // trigger int8_t triggerStatus; @@ -337,6 +342,8 @@ struct SStreamTask { void* launchTaskTimer; SMsgCb* pMsgCb; // msg handle SStreamState* pState; // state backend + SArray* pRspMsgList; + TdThreadMutex lock; // the followings attributes don't be serialized int32_t notReadyTasks; @@ -458,7 +465,9 @@ typedef struct { typedef struct { int64_t streamId; - int32_t taskId; + int32_t upstreamTaskId; + int32_t downstreamTaskId; + int32_t upstreamNodeId; int32_t childId; } SStreamScanHistoryFinishReq, SStreamTransferReq; @@ -519,6 +528,17 @@ int32_t tDecodeSStreamCheckpointReq(SDecoder* pDecoder, SStreamCheckpointReq* pR int32_t tEncodeSStreamCheckpointRsp(SEncoder* pEncoder, const SStreamCheckpointRsp* pRsp); int32_t tDecodeSStreamCheckpointRsp(SDecoder* pDecoder, SStreamCheckpointRsp* pRsp); +typedef struct { + int64_t streamId; + int32_t upstreamTaskId; + int32_t upstreamNodeId; + int32_t downstreamId; + int32_t downstreamNode; +} SStreamCompleteHistoryMsg; + +int32_t tEncodeCompleteHistoryDataMsg(SEncoder* pEncoder, const SStreamCompleteHistoryMsg* pReq); +int32_t tDecodeCompleteHistoryDataMsg(SDecoder* pDecoder, SStreamCompleteHistoryMsg* pReq); + typedef struct { int64_t streamId; int32_t downstreamTaskId; @@ -559,7 +579,6 @@ int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, S int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, int32_t code); int32_t streamProcessRetrieveReq(SStreamTask* pTask, SStreamRetrieveReq* pReq, SRpcMsg* pMsg); -// int32_t streamProcessRetrieveRsp(SStreamTask* pTask, SStreamRetrieveRsp* pRsp); void streamTaskInputFail(SStreamTask* pTask); int32_t streamTryExec(SStreamTask* pTask); @@ -568,57 +587,64 @@ int32_t streamTaskOutputResultBlock(SStreamTask* pTask, SStreamDataBlock* pBlock bool streamTaskShouldStop(const SStreamStatus* pStatus); bool streamTaskShouldPause(const SStreamStatus* pStatus); bool streamTaskIsIdle(const SStreamTask* pTask); +int32_t streamTaskEndScanWAL(SStreamTask* pTask); -int32_t streamScanExec(SStreamTask* pTask, int32_t batchSz); +SStreamChildEpInfo * streamTaskGetUpstreamTaskEpInfo(SStreamTask* pTask, int32_t taskId); +int32_t streamScanExec(SStreamTask* pTask, int32_t batchSize); char* createStreamTaskIdStr(int64_t streamId, int32_t taskId); // recover and fill history -void streamPrepareNdoCheckDownstream(SStreamTask* pTask); -int32_t streamTaskCheckDownstreamTasks(SStreamTask* pTask); +void streamTaskCheckDownstreamTasks(SStreamTask* pTask); +int32_t streamTaskDoCheckDownstreamTasks(SStreamTask* pTask); int32_t streamTaskLaunchScanHistory(SStreamTask* pTask); int32_t streamTaskCheckStatus(SStreamTask* pTask); +int32_t streamSendCheckRsp(const SStreamMeta* pMeta, const SStreamTaskCheckReq* pReq, SStreamTaskCheckRsp* pRsp, + SRpcHandleInfo* pRpcInfo, int32_t taskId); int32_t streamProcessCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRsp); -int32_t streamCheckHistoryTaskDownstream(SStreamTask* pTask); +int32_t streamLaunchFillHistoryTask(SStreamTask* pTask); int32_t streamTaskScanHistoryDataComplete(SStreamTask* pTask); -int32_t streamStartRecoverTask(SStreamTask* pTask, int8_t igUntreated); -void streamHistoryTaskSetVerRangeStep2(SStreamTask* pTask); - -bool streamTaskRecoverScanStep1Finished(SStreamTask* pTask); -bool streamTaskRecoverScanStep2Finished(SStreamTask* pTask); -int32_t streamTaskRecoverSetAllStepFinished(SStreamTask* pTask); +int32_t streamStartScanHistoryAsync(SStreamTask* pTask, int8_t igUntreated); +bool streamHistoryTaskSetVerRangeStep2(SStreamTask* pTask, int64_t latestVer); // common -int32_t streamSetParamForScanHistory(SStreamTask* pTask); int32_t streamRestoreParam(SStreamTask* pTask); int32_t streamSetStatusNormal(SStreamTask* pTask); const char* streamGetTaskStatusStr(int32_t status); +void streamTaskPause(SStreamTask* pTask); +void streamTaskResume(SStreamTask* pTask); +void streamTaskHalt(SStreamTask* pTask); +void streamTaskResumeFromHalt(SStreamTask* pTask); +void streamTaskDisablePause(SStreamTask* pTask); +void streamTaskEnablePause(SStreamTask* pTask); // source level int32_t streamSetParamForStreamScannerStep1(SStreamTask* pTask, SVersionRange* pVerRange, STimeWindow* pWindow); int32_t streamSetParamForStreamScannerStep2(SStreamTask* pTask, SVersionRange* pVerRange, STimeWindow* pWindow); -int32_t streamBuildSourceRecover1Req(SStreamTask* pTask, SStreamScanHistoryReq* pReq, int8_t igUntreated); int32_t streamSourceScanHistoryData(SStreamTask* pTask); int32_t streamDispatchScanHistoryFinishMsg(SStreamTask* pTask); int32_t streamDispatchTransferStateMsg(SStreamTask* pTask); // agg level -int32_t streamAggScanHistoryPrepare(SStreamTask* pTask); -int32_t streamProcessScanHistoryFinishReq(SStreamTask* pTask, int32_t taskId, int32_t childId); +int32_t streamTaskScanHistoryPrepare(SStreamTask* pTask); +int32_t streamProcessScanHistoryFinishReq(SStreamTask* pTask, SStreamScanHistoryFinishReq *pReq, SRpcHandleInfo* pRpcInfo); +int32_t streamProcessScanHistoryFinishRsp(SStreamTask* pTask); // stream task meta void streamMetaInit(); void streamMetaCleanup(); SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandFunc, int32_t vgId); void streamMetaClose(SStreamMeta* streamMeta); + +// save to b-tree meta store int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask); -int32_t streamMetaAddDeployedTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* pTask); -int32_t streamMetaAddSerializedTask(SStreamMeta* pMeta, int64_t checkpointVer, char* msg, int32_t msgLen); -int32_t streamMetaGetNumOfTasks(const SStreamMeta* pMeta); // todo remove it +int32_t streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId); +int32_t streamMetaRegisterTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* pTask, bool* pAdded); +int32_t streamMetaUnregisterTask(SStreamMeta* pMeta, int32_t taskId); +int32_t streamMetaGetNumOfTasks(SStreamMeta* pMeta); // todo remove it SStreamTask* streamMetaAcquireTask(SStreamMeta* pMeta, int32_t taskId); void streamMetaReleaseTask(SStreamMeta* pMeta, SStreamTask* pTask); -void streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId); int32_t streamMetaBegin(SStreamMeta* pMeta); int32_t streamMetaCommit(SStreamMeta* pMeta); diff --git a/include/libs/stream/tstreamUpdate.h b/include/libs/stream/tstreamUpdate.h index bd5a3be8de638005a5e85e999d3888702903eb75..7bb1d027c9da6539708a755b52f23b87a10beea8 100644 --- a/include/libs/stream/tstreamUpdate.h +++ b/include/libs/stream/tstreamUpdate.h @@ -53,6 +53,8 @@ void updateInfoAddCloseWindowSBF(SUpdateInfo *pInfo); void updateInfoDestoryColseWinSBF(SUpdateInfo *pInfo); int32_t updateInfoSerialize(void *buf, int32_t bufLen, const SUpdateInfo *pInfo); int32_t updateInfoDeserialize(void *buf, int32_t bufLen, SUpdateInfo *pInfo); +void windowSBfDelete(SUpdateInfo *pInfo, uint64_t count); +void windowSBfAdd(SUpdateInfo *pInfo, uint64_t count); #ifdef __cplusplus } diff --git a/include/libs/sync/sync.h b/include/libs/sync/sync.h index 76b504ea99577da6ffc4278076ebe8d815095fea..b4c52fbc5632d9403f8a58d84f4c5d8979a25d1b 100644 --- a/include/libs/sync/sync.h +++ b/include/libs/sync/sync.h @@ -241,6 +241,8 @@ typedef struct SSyncState { bool restored; bool canRead; int32_t progress; + SyncTerm term; + int64_t roleTimeMs; } SSyncState; int32_t syncInit(); @@ -254,17 +256,17 @@ int32_t syncPropose(int64_t rid, SRpcMsg* pMsg, bool isWeak, int64_t* seq); int32_t syncCheckMember(int64_t rid); int32_t syncIsCatchUp(int64_t rid); ESyncRole syncGetRole(int64_t rid); -int32_t syncProcessMsg(int64_t rid, SRpcMsg* pMsg); -int32_t syncReconfig(int64_t rid, SSyncCfg* pCfg); -int32_t syncBeginSnapshot(int64_t rid, int64_t lastApplyIndex); -int32_t syncEndSnapshot(int64_t rid); -int32_t syncLeaderTransfer(int64_t rid); -int32_t syncStepDown(int64_t rid, SyncTerm newTerm); -bool syncIsReadyForRead(int64_t rid); -bool syncSnapshotSending(int64_t rid); -bool syncSnapshotRecving(int64_t rid); -int32_t syncSendTimeoutRsp(int64_t rid, int64_t seq); -int32_t syncForceBecomeFollower(SSyncNode* ths, const SRpcMsg* pRpcMsg); +int32_t syncProcessMsg(int64_t rid, SRpcMsg* pMsg); +int32_t syncReconfig(int64_t rid, SSyncCfg* pCfg); +int32_t syncBeginSnapshot(int64_t rid, int64_t lastApplyIndex); +int32_t syncEndSnapshot(int64_t rid); +int32_t syncLeaderTransfer(int64_t rid); +int32_t syncStepDown(int64_t rid, SyncTerm newTerm); +bool syncIsReadyForRead(int64_t rid); +bool syncSnapshotSending(int64_t rid); +bool syncSnapshotRecving(int64_t rid); +int32_t syncSendTimeoutRsp(int64_t rid, int64_t seq); +int32_t syncForceBecomeFollower(SSyncNode* ths, const SRpcMsg* pRpcMsg); SSyncState syncGetState(int64_t rid); void syncGetRetryEpSet(int64_t rid, SEpSet* pEpSet); diff --git a/include/os/os.h b/include/os/os.h index 309a977ff6b7e6d500d93cba0ee487cc7befbd9e..ac1a750b78cc01109821a39f24da5d4453abf8d8 100644 --- a/include/os/os.h +++ b/include/os/os.h @@ -53,6 +53,7 @@ extern "C" { #else #include #include +#include #if defined(_TD_X86_) #include #endif diff --git a/include/os/osSysinfo.h b/include/os/osSysinfo.h index b5309178aeb2fb5e03b2e8983cda4c476061aa23..a6a3655a5516748a8cb75ac453c21948636888a9 100644 --- a/include/os/osSysinfo.h +++ b/include/os/osSysinfo.h @@ -35,6 +35,7 @@ typedef struct { bool taosCheckSystemIsLittleEnd(); void taosGetSystemInfo(); +int64_t taosGetOsUptime(); int32_t taosGetEmail(char *email, int32_t maxLen); int32_t taosGetOsReleaseName(char *releaseName, char* sName, char* ver, int32_t maxLen); int32_t taosGetCpuInfo(char *cpuModel, int32_t maxLen, float *numOfCores); diff --git a/include/util/taoserror.h b/include/util/taoserror.h index ae105aa1dcc7ecb594db1eb9f4fb797e3003037f..b43985074c7d88673f80dac9b02607a66d114b20 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -775,6 +775,12 @@ int32_t* taosGetErrno(); #define TSDB_CODE_TMQ_TOPIC_OUT_OF_RANGE TAOS_DEF_ERROR_CODE(0, 0x4004) #define TSDB_CODE_TMQ_GROUP_OUT_OF_RANGE TAOS_DEF_ERROR_CODE(0, 0x4005) #define TSDB_CODE_TMQ_SNAPSHOT_ERROR TAOS_DEF_ERROR_CODE(0, 0x4006) +#define TSDB_CODE_TMQ_VERSION_OUT_OF_RANGE TAOS_DEF_ERROR_CODE(0, 0x4007) +#define TSDB_CODE_TMQ_INVALID_VGID TAOS_DEF_ERROR_CODE(0, 0x4008) +#define TSDB_CODE_TMQ_INVALID_TOPIC TAOS_DEF_ERROR_CODE(0, 0x4009) +#define TSDB_CODE_TMQ_NEED_INITIALIZED TAOS_DEF_ERROR_CODE(0, 0x4010) +#define TSDB_CODE_TMQ_NO_COMMITTED TAOS_DEF_ERROR_CODE(0, 0x4011) +#define TSDB_CODE_TMQ_SAME_COMMITTED_VALUE TAOS_DEF_ERROR_CODE(0, 0x4012) // stream #define TSDB_CODE_STREAM_TASK_NOT_EXIST TAOS_DEF_ERROR_CODE(0, 0x4100) diff --git a/include/util/tarray.h b/include/util/tarray.h index a93c6953700fcc5f8861a2161e9f0b8bd1e97e56..f56c9e3a1761303961aa9e946e7063049a48baf9 100644 --- a/include/util/tarray.h +++ b/include/util/tarray.h @@ -22,7 +22,7 @@ extern "C" { #endif -#define TARRAY_MIN_SIZE 8 +#define TARRAY_MIN_SIZE 4 #define TARRAY_GET_ELEM(array, index) ((void*)((char*)((array)->pData) + (index) * (array)->elemSize)) #define TARRAY_ELEM_IDX(array, ele) (POINTER_DISTANCE(ele, (array)->pData) / (array)->elemSize) @@ -138,7 +138,7 @@ size_t taosArrayGetSize(const SArray* pArray); * @param index * @param pData */ -void* taosArrayInsert(SArray* pArray, size_t index, void* pData); +void* taosArrayInsert(SArray* pArray, size_t index, const void* pData); /** * set data in array @@ -204,9 +204,9 @@ void taosArrayClearEx(SArray* pArray, void (*fp)(void*)); void* taosArrayDestroy(SArray* pArray); -void taosArrayDestroyP(SArray* pArray, FDelete fp); +void taosArrayDestroyP(SArray* pArray, FDelete fp); -void taosArrayDestroyEx(SArray* pArray, FDelete fp); +void taosArrayDestroyEx(SArray* pArray, FDelete fp); void taosArraySwap(SArray* a, SArray* b); diff --git a/include/util/tarray2.h b/include/util/tarray2.h new file mode 100644 index 0000000000000000000000000000000000000000..cd49e647895e1571b6b70ad46408420d6972a4b7 --- /dev/null +++ b/include/util/tarray2.h @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "talgo.h" + +#ifndef _TD_UTIL_TARRAY2_H_ +#define _TD_UTIL_TARRAY2_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +// a: a +// e: element +// ep: element pointer +// cmp: compare function +// idx: index +// cb: callback function + +#define TARRAY2(TYPE) \ + struct { \ + int32_t size; \ + int32_t capacity; \ + TYPE *data; \ + } + +typedef void (*TArray2Cb)(void *); + +#define TARRAY2_SIZE(a) ((a)->size) +#define TARRAY2_CAPACITY(a) ((a)->capacity) +#define TARRAY2_DATA(a) ((a)->data) +#define TARRAY2_GET(a, i) ((a)->data[i]) +#define TARRAY2_GET_PTR(a, i) ((a)->data + i) +#define TARRAY2_FIRST(a) ((a)->data[0]) +#define TARRAY2_LAST(a) ((a)->data[(a)->size - 1]) +#define TARRAY2_DATA_LEN(a) ((a)->size * sizeof(((a)->data[0]))) + +static FORCE_INLINE int32_t tarray2_make_room(void *arr, int32_t expSize, int32_t eleSize) { + TARRAY2(void) *a = arr; + + int32_t capacity = (a->capacity > 0) ? (a->capacity << 1) : 32; + while (capacity < expSize) { + capacity <<= 1; + } + void *p = taosMemoryRealloc(a->data, capacity * eleSize); + if (p == NULL) return TSDB_CODE_OUT_OF_MEMORY; + a->capacity = capacity; + a->data = p; + return 0; +} + +static FORCE_INLINE int32_t tarray2InsertBatch(void *arr, int32_t idx, const void *elePtr, int32_t numEle, + int32_t eleSize) { + TARRAY2(uint8_t) *a = arr; + + int32_t ret = 0; + if (a->size + numEle > a->capacity) { + ret = tarray2_make_room(a, a->size + numEle, eleSize); + } + if (ret == 0) { + if (idx < a->size) { + memmove(a->data + (idx + numEle) * eleSize, a->data + idx * eleSize, (a->size - idx) * eleSize); + } + memcpy(a->data + idx * eleSize, elePtr, numEle * eleSize); + a->size += numEle; + } + return ret; +} + +static FORCE_INLINE void *tarray2Search(void *arr, const void *elePtr, int32_t eleSize, __compar_fn_t compar, + int32_t flag) { + TARRAY2(void) *a = arr; + return taosbsearch(elePtr, a->data, a->size, eleSize, compar, flag); +} + +static FORCE_INLINE int32_t tarray2SearchIdx(void *arr, const void *elePtr, int32_t eleSize, __compar_fn_t compar, + int32_t flag) { + TARRAY2(void) *a = arr; + void *p = taosbsearch(elePtr, a->data, a->size, eleSize, compar, flag); + if (p == NULL) { + return -1; + } else { + return (int32_t)(((uint8_t *)p - (uint8_t *)a->data) / eleSize); + } +} + +static FORCE_INLINE int32_t tarray2SortInsert(void *arr, const void *elePtr, int32_t eleSize, __compar_fn_t compar) { + TARRAY2(void) *a = arr; + int32_t idx = tarray2SearchIdx(arr, elePtr, eleSize, compar, TD_GT); + return tarray2InsertBatch(arr, idx < 0 ? a->size : idx, elePtr, 1, eleSize); +} + +#define TARRAY2_INIT_EX(a, size_, capacity_, data_) \ + do { \ + (a)->size = (size_); \ + (a)->capacity = (capacity_); \ + (a)->data = (data_); \ + } while (0) + +#define TARRAY2_INIT(a) TARRAY2_INIT_EX(a, 0, 0, NULL) + +#define TARRAY2_CLEAR(a, cb) \ + do { \ + if ((cb) && (a)->size > 0) { \ + TArray2Cb cb_ = (TArray2Cb)(cb); \ + for (int32_t i = 0; i < (a)->size; ++i) { \ + cb_((a)->data + i); \ + } \ + } \ + (a)->size = 0; \ + } while (0) + +#define TARRAY2_DESTROY(a, cb) \ + do { \ + TARRAY2_CLEAR(a, cb); \ + if ((a)->data) { \ + taosMemoryFree((a)->data); \ + (a)->data = NULL; \ + } \ + (a)->capacity = 0; \ + } while (0) + +#define TARRAY2_INSERT_PTR(a, idx, ep) tarray2InsertBatch(a, idx, ep, 1, sizeof((a)->data[0])) +#define TARRAY2_APPEND_PTR(a, ep) tarray2InsertBatch(a, (a)->size, ep, 1, sizeof((a)->data[0])) +#define TARRAY2_APPEND_BATCH(a, ep, n) tarray2InsertBatch(a, (a)->size, ep, n, sizeof((a)->data[0])) +#define TARRAY2_APPEND(a, e) TARRAY2_APPEND_PTR(a, &(e)) + +// return (TYPE *) +#define TARRAY2_SEARCH(a, ep, cmp, flag) tarray2Search(a, ep, sizeof(((a)->data[0])), (__compar_fn_t)cmp, flag) + +#define TARRAY2_SEARCH_IDX(a, ep, cmp, flag) tarray2SearchIdx(a, ep, sizeof(((a)->data[0])), (__compar_fn_t)cmp, flag) + +#define TARRAY2_SORT_INSERT(a, e, cmp) tarray2SortInsert(a, &(e), sizeof(((a)->data[0])), (__compar_fn_t)cmp) +#define TARRAY2_SORT_INSERT_P(a, ep, cmp) tarray2SortInsert(a, ep, sizeof(((a)->data[0])), (__compar_fn_t)cmp) + +#define TARRAY2_REMOVE(a, idx, cb) \ + do { \ + if ((idx) < (a)->size) { \ + if (cb) { \ + TArray2Cb cb_ = (TArray2Cb)(cb); \ + cb_((a)->data + (idx)); \ + } \ + if ((idx) < (a)->size - 1) { \ + memmove((a)->data + (idx), (a)->data + (idx) + 1, sizeof((*(a)->data)) * ((a)->size - (idx)-1)); \ + } \ + (a)->size--; \ + } \ + } while (0) + +#define TARRAY2_FOREACH(a, e) for (int32_t __i = 0; __i < (a)->size && ((e) = (a)->data[__i], 1); __i++) +#define TARRAY2_FOREACH_REVERSE(a, e) for (int32_t __i = (a)->size - 1; __i >= 0 && ((e) = (a)->data[__i], 1); __i--) +#define TARRAY2_FOREACH_PTR(a, ep) for (int32_t __i = 0; __i < (a)->size && ((ep) = &(a)->data[__i], 1); __i++) +#define TARRAY2_FOREACH_PTR_REVERSE(a, ep) \ + for (int32_t __i = (a)->size - 1; __i >= 0 && ((ep) = &(a)->data[__i], 1); __i--) + +#ifdef __cplusplus +} +#endif + +#endif /*_TD_UTIL_TARRAY2_H_*/ diff --git a/include/util/tconfig.h b/include/util/tconfig.h index ca3c5eb0ebd35867464c5c02851a9761e111d250..ed8eedb7e574fd7f5e682f45eebb57df3328ee31 100644 --- a/include/util/tconfig.h +++ b/include/util/tconfig.h @@ -50,11 +50,17 @@ typedef enum { CFG_DTYPE_TIMEZONE } ECfgDataType; +typedef enum { + CFG_SCOPE_SERVER, + CFG_SCOPE_CLIENT, + CFG_SCOPE_BOTH +} ECfgScopeType; + typedef struct SConfigItem { ECfgSrcType stype; ECfgDataType dtype; - bool tsc; - char *name; + int8_t scope; + char *name; union { bool bval; float fval; @@ -92,20 +98,21 @@ int32_t cfgGetSize(SConfig *pCfg); SConfigItem *cfgGetItem(SConfig *pCfg, const char *name); int32_t cfgSetItem(SConfig *pCfg, const char *name, const char *value, ECfgSrcType stype); -int32_t cfgAddBool(SConfig *pCfg, const char *name, bool defaultVal, bool tsc); -int32_t cfgAddInt32(SConfig *pCfg, const char *name, int32_t defaultVal, int64_t minval, int64_t maxval, bool tsc); -int32_t cfgAddInt64(SConfig *pCfg, const char *name, int64_t defaultVal, int64_t minval, int64_t maxval, bool tsc); -int32_t cfgAddFloat(SConfig *pCfg, const char *name, float defaultVal, double minval, double maxval, bool tsc); -int32_t cfgAddString(SConfig *pCfg, const char *name, const char *defaultVal, bool tsc); -int32_t cfgAddDir(SConfig *pCfg, const char *name, const char *defaultVal, bool tsc); -int32_t cfgAddLocale(SConfig *pCfg, const char *name, const char *defaultVal); -int32_t cfgAddCharset(SConfig *pCfg, const char *name, const char *defaultVal); -int32_t cfgAddTimezone(SConfig *pCfg, const char *name, const char *defaultVal); +int32_t cfgAddBool(SConfig *pCfg, const char *name, bool defaultVal, int8_t scope); +int32_t cfgAddInt32(SConfig *pCfg, const char *name, int32_t defaultVal, int64_t minval, int64_t maxval, int8_t scope); +int32_t cfgAddInt64(SConfig *pCfg, const char *name, int64_t defaultVal, int64_t minval, int64_t maxval, int8_t scope); +int32_t cfgAddFloat(SConfig *pCfg, const char *name, float defaultVal, double minval, double maxval, int8_t scope); +int32_t cfgAddString(SConfig *pCfg, const char *name, const char *defaultVal, int8_t scope); +int32_t cfgAddDir(SConfig *pCfg, const char *name, const char *defaultVal, int8_t scope); +int32_t cfgAddLocale(SConfig *pCfg, const char *name, const char *defaultVal, int8_t scope); +int32_t cfgAddCharset(SConfig *pCfg, const char *name, const char *defaultVal, int8_t scope); +int32_t cfgAddTimezone(SConfig *pCfg, const char *name, const char *defaultVal, int8_t scope); const char *cfgStypeStr(ECfgSrcType type); const char *cfgDtypeStr(ECfgDataType type); void cfgDumpItemValue(SConfigItem *pItem, char *buf, int32_t bufSize, int32_t *pLen); +void cfgDumpItemScope(SConfigItem *pItem, char *buf, int32_t bufSize, int32_t *pLen); void cfgDumpCfg(SConfig *pCfg, bool tsc, bool dump); diff --git a/include/util/tdef.h b/include/util/tdef.h index 69b012ecea6227dac152fe74ad8cadf6f83e47e0..e4af88bf10291235e07ae7b18674fa064b054683 100644 --- a/include/util/tdef.h +++ b/include/util/tdef.h @@ -191,16 +191,16 @@ typedef enum ELogicConditionType { #define TSDB_MAX_COLUMNS 4096 #define TSDB_MIN_COLUMNS 2 // PRIMARY COLUMN(timestamp) + other columns -#define TSDB_NODE_NAME_LEN 64 -#define TSDB_TABLE_NAME_LEN 193 // it is a null-terminated string -#define TSDB_TOPIC_NAME_LEN 193 // it is a null-terminated string -#define TSDB_CGROUP_LEN 193 // it is a null-terminated string -#define TSDB_OFFSET_LEN 64 // it is a null-terminated string -#define TSDB_USER_CGROUP_LEN (TSDB_USER_LEN + TSDB_CGROUP_LEN) // it is a null-terminated string -#define TSDB_STREAM_NAME_LEN 193 // it is a null-terminated string -#define TSDB_DB_NAME_LEN 65 -#define TSDB_DB_FNAME_LEN (TSDB_ACCT_ID_LEN + TSDB_DB_NAME_LEN + TSDB_NAME_DELIMITER_LEN) -#define TSDB_PRIVILEDGE_CONDITION_LEN 200 +#define TSDB_NODE_NAME_LEN 64 +#define TSDB_TABLE_NAME_LEN 193 // it is a null-terminated string +#define TSDB_TOPIC_NAME_LEN 193 // it is a null-terminated string +#define TSDB_CGROUP_LEN 193 // it is a null-terminated string +#define TSDB_OFFSET_LEN 64 // it is a null-terminated string +#define TSDB_USER_CGROUP_LEN (TSDB_USER_LEN + TSDB_CGROUP_LEN) // it is a null-terminated string +#define TSDB_STREAM_NAME_LEN 193 // it is a null-terminated string +#define TSDB_DB_NAME_LEN 65 +#define TSDB_DB_FNAME_LEN (TSDB_ACCT_ID_LEN + TSDB_DB_NAME_LEN + TSDB_NAME_DELIMITER_LEN) +#define TSDB_PRIVILEDGE_CONDITION_LEN 48*1024 #define TSDB_FUNC_NAME_LEN 65 #define TSDB_FUNC_COMMENT_LEN 1024 * 1024 @@ -249,15 +249,15 @@ typedef enum ELogicConditionType { #define TSDB_LABEL_LEN 8 #define TSDB_JOB_STATUS_LEN 32 -#define TSDB_CLUSTER_ID_LEN 40 -#define TSDB_FQDN_LEN 128 -#define TSDB_EP_LEN (TSDB_FQDN_LEN + 6) -#define TSDB_IPv4ADDR_LEN 16 -#define TSDB_FILENAME_LEN 128 -#define TSDB_SHOW_SQL_LEN 2048 +#define TSDB_CLUSTER_ID_LEN 40 +#define TSDB_FQDN_LEN 128 +#define TSDB_EP_LEN (TSDB_FQDN_LEN + 6) +#define TSDB_IPv4ADDR_LEN 16 +#define TSDB_FILENAME_LEN 128 +#define TSDB_SHOW_SQL_LEN 2048 #define TSDB_SHOW_SCHEMA_JSON_LEN TSDB_MAX_COLUMNS * 256 -#define TSDB_SLOW_QUERY_SQL_LEN 512 -#define TSDB_SHOW_SUBQUERY_LEN 1000 +#define TSDB_SLOW_QUERY_SQL_LEN 512 +#define TSDB_SHOW_SUBQUERY_LEN 1000 #define TSDB_TRANS_STAGE_LEN 12 #define TSDB_TRANS_TYPE_LEN 16 @@ -369,8 +369,13 @@ typedef enum ELogicConditionType { #define TSDB_DB_SCHEMALESS_OFF 0 #define TSDB_DEFAULT_DB_SCHEMALESS TSDB_DB_SCHEMALESS_OFF #define TSDB_MIN_STT_TRIGGER 1 -#define TSDB_MAX_STT_TRIGGER 16 -#define TSDB_DEFAULT_SST_TRIGGER 1 +#ifdef TD_ENTERPRISE +#define TSDB_MAX_STT_TRIGGER 16 +#define TSDB_DEFAULT_SST_TRIGGER 2 +#else +#define TSDB_MAX_STT_TRIGGER 1 +#define TSDB_DEFAULT_SST_TRIGGER 1 +#endif #define TSDB_MIN_HASH_PREFIX (2 - TSDB_TABLE_NAME_LEN) #define TSDB_MAX_HASH_PREFIX (TSDB_TABLE_NAME_LEN - 2) #define TSDB_DEFAULT_HASH_PREFIX 0 @@ -379,8 +384,8 @@ typedef enum ELogicConditionType { #define TSDB_DEFAULT_HASH_SUFFIX 0 #define TSDB_DB_MIN_WAL_RETENTION_PERIOD -1 -#define TSDB_REP_DEF_DB_WAL_RET_PERIOD 0 -#define TSDB_REPS_DEF_DB_WAL_RET_PERIOD 0 +#define TSDB_REP_DEF_DB_WAL_RET_PERIOD 3600 +#define TSDB_REPS_DEF_DB_WAL_RET_PERIOD 3600 #define TSDB_DB_MIN_WAL_RETENTION_SIZE -1 #define TSDB_REP_DEF_DB_WAL_RET_SIZE 0 #define TSDB_REPS_DEF_DB_WAL_RET_SIZE 0 @@ -410,10 +415,10 @@ typedef enum ELogicConditionType { #define TSDB_EXPLAIN_RESULT_ROW_SIZE (16 * 1024) #define TSDB_EXPLAIN_RESULT_COLUMN_NAME "QUERY_PLAN" -#define TSDB_MAX_FIELD_LEN 65519 // 16384:65519 -#define TSDB_MAX_BINARY_LEN TSDB_MAX_FIELD_LEN // 16384-8:65519 -#define TSDB_MAX_NCHAR_LEN TSDB_MAX_FIELD_LEN // 16384-8:65519 -#define TSDB_MAX_GEOMETRY_LEN TSDB_MAX_FIELD_LEN // 16384-8:65519 +#define TSDB_MAX_FIELD_LEN 65519 // 16384:65519 +#define TSDB_MAX_BINARY_LEN TSDB_MAX_FIELD_LEN // 16384-8:65519 +#define TSDB_MAX_NCHAR_LEN TSDB_MAX_FIELD_LEN // 16384-8:65519 +#define TSDB_MAX_GEOMETRY_LEN TSDB_MAX_FIELD_LEN // 16384-8:65519 #define PRIMARYKEY_TIMESTAMP_COL_ID 1 #define COL_REACH_END(colId, maxColId) ((colId) > (maxColId)) @@ -492,6 +497,7 @@ enum { #define TSDB_CONFIG_OPTION_LEN 32 #define TSDB_CONFIG_VALUE_LEN 64 +#define TSDB_CONFIG_SCOPE_LEN 8 #define TSDB_CONFIG_NUMBER 8 #define QUERY_ID_SIZE 20 diff --git a/include/util/tlist.h b/include/util/tlist.h index c684e90a3318444b706720dee0b64ba220437aee..0924c133b9c77b5717ff30386f49fe63c9e506dd 100644 --- a/include/util/tlist.h +++ b/include/util/tlist.h @@ -241,6 +241,54 @@ void tdListNodeGetData(SList *list, SListNode *node, void *target); void tdListInitIter(SList *list, SListIter *pIter, TD_LIST_DIRECTION_T direction); SListNode *tdListNext(SListIter *pIter); +// macros ==================================================================================== + +// q: for queue +// n: for node +// m: for member + +#define LISTD(TYPE) \ + struct { \ + TYPE *next, *prev; \ + } + +#define LISTD_NEXT(n, m) ((n)->m.next) +#define LISTD_PREV(n, m) ((n)->m.prev) +#define LISTD_INIT(q, m) (LISTD_NEXT(q, m) = LISTD_PREV(q, m) = (q)) +#define LISTD_HEAD(q, m) (LISTD_NEXT(q, m)) +#define LISTD_TAIL(q, m) (LISTD_PREV(q, m)) +#define LISTD_PREV_NEXT(n, m) (LISTD_NEXT(LISTD_PREV(n, m), m)) +#define LISTD_NEXT_PREV(n, m) (LISTD_PREV(LISTD_NEXT(n, m), m)) + +#define LISTD_INSERT_HEAD(q, n, m) \ + do { \ + LISTD_NEXT(n, m) = LISTD_NEXT(q, m); \ + LISTD_PREV(n, m) = (q); \ + LISTD_NEXT_PREV(n, m) = (n); \ + LISTD_NEXT(q, m) = (n); \ + } while (0) + +#define LISTD_INSERT_TAIL(q, n, m) \ + do { \ + LISTD_NEXT(n, m) = (q); \ + LISTD_PREV(n, m) = LISTD_PREV(q, m); \ + LISTD_PREV_NEXT(n, m) = (n); \ + LISTD_PREV(q, m) = (n); \ + } while (0) + +#define LISTD_REMOVE(n, m) \ + do { \ + LISTD_PREV_NEXT(n, m) = LISTD_NEXT(n, m); \ + LISTD_NEXT_PREV(n, m) = LISTD_PREV(n, m); \ + } while (0) + +#define LISTD_FOREACH(q, n, m) for ((n) = LISTD_HEAD(q, m); (n) != (q); (n) = LISTD_NEXT(n, m)) +#define LISTD_FOREACH_REVERSE(q, n, m) for ((n) = LISTD_TAIL(q, m); (n) != (q); (n) = LISTD_PREV(n, m)) +#define LISTD_FOREACH_SAFE(q, n, t, m) \ + for ((n) = LISTD_HEAD(q, m), (t) = LISTD_NEXT(n, m); (n) != (q); (n) = (t), (t) = LISTD_NEXT(n, m)) +#define LISTD_FOREACH_REVERSE_SAFE(q, n, t, m) \ + for ((n) = LISTD_TAIL(q, m), (t) = LISTD_PREV(n, m); (n) != (q); (n) = (t), (t) = LISTD_PREV(n, m)) + #ifdef __cplusplus } #endif diff --git a/include/util/trbtree.h b/include/util/trbtree.h index e2264194401bee65a22f0301f0339cb76a8d7356..8353a91f0acc3b5ced557ef594572f7db53c64fd 100644 --- a/include/util/trbtree.h +++ b/include/util/trbtree.h @@ -39,7 +39,7 @@ void tRBTreeDrop(SRBTree *pTree, SRBTreeNode *z); SRBTreeNode *tRBTreeDropByKey(SRBTree *pTree, void *pKey); SRBTreeNode *tRBTreeDropMin(SRBTree *pTree); SRBTreeNode *tRBTreeDropMax(SRBTree *pTree); -SRBTreeNode *tRBTreeGet(SRBTree *pTree, const SRBTreeNode *pKeyNode); +SRBTreeNode *tRBTreeGet(const SRBTree *pTree, const SRBTreeNode *pKeyNode); // SRBTreeIter ============================================= #define tRBTreeIterCreate(tree, ascend) \ @@ -67,9 +67,9 @@ struct SRBTree { }; struct SRBTreeIter { - int8_t asc; - SRBTree *pTree; - SRBTreeNode *pNode; + int8_t asc; + const SRBTree *pTree; + SRBTreeNode *pNode; }; #ifdef __cplusplus diff --git a/include/util/tutil.h b/include/util/tutil.h index 7a59aa170aa0f0011ec55cbb5507047c6c5d2a21..a2cfa4cfe56f0c5a6021327f6461bde812ebc46e 100644 --- a/include/util/tutil.h +++ b/include/util/tutil.h @@ -29,7 +29,7 @@ extern "C" { int32_t strdequote(char *src); size_t strtrim(char *src); char *strnchr(const char *haystack, char needle, int32_t len, bool skipquote); -TdUcs4* wcsnchr(const TdUcs4* haystack, TdUcs4 needle, size_t len); +TdUcs4 *wcsnchr(const TdUcs4 *haystack, TdUcs4 needle, size_t len); char **strsplit(char *src, const char *delim, int32_t *num); char *strtolower(char *dst, const char *src); @@ -37,11 +37,11 @@ char *strntolower(char *dst, const char *src, int32_t n); char *strntolower_s(char *dst, const char *src, int32_t n); int64_t strnatoi(char *num, int32_t len); -size_t tstrncspn(const char *str, size_t ssize, const char *reject, size_t rsize); -size_t twcsncspn(const TdUcs4 *wcs, size_t size, const TdUcs4 *reject, size_t rsize); +size_t tstrncspn(const char *str, size_t ssize, const char *reject, size_t rsize); +size_t twcsncspn(const TdUcs4 *wcs, size_t size, const TdUcs4 *reject, size_t rsize); -char *strbetween(char *string, char *begin, char *end); -char *paGetToken(char *src, char **token, int32_t *tokenLen); +char *strbetween(char *string, char *begin, char *end); +char *paGetToken(char *src, char **token, int32_t *tokenLen); int32_t taosByteArrayToHexStr(char bytes[], int32_t len, char hexstr[]); int32_t taosHexStrToByteArray(char hexstr[], char bytes[]); @@ -81,12 +81,13 @@ static FORCE_INLINE void taosEncryptPass_c(uint8_t *inBuf, size_t len, char *tar static FORCE_INLINE int32_t taosGetTbHashVal(const char *tbname, int32_t tblen, int32_t method, int32_t prefix, int32_t suffix) { - if ((prefix == 0 && suffix == 0) || (tblen <= (prefix + suffix)) || (tblen <= -1 * (prefix + suffix)) || prefix * suffix < 0) { + if ((prefix == 0 && suffix == 0) || (tblen <= (prefix + suffix)) || (tblen <= -1 * (prefix + suffix)) || + prefix * suffix < 0) { return MurmurHash3_32(tbname, tblen); } else if (prefix > 0 || suffix > 0) { return MurmurHash3_32(tbname + prefix, tblen - prefix - suffix); } else { - char tbName[TSDB_TABLE_FNAME_LEN]; + char tbName[TSDB_TABLE_FNAME_LEN]; int32_t offset = 0; if (prefix < 0) { offset = -1 * prefix; @@ -94,20 +95,33 @@ static FORCE_INLINE int32_t taosGetTbHashVal(const char *tbname, int32_t tblen, } if (suffix < 0) { strncpy(tbName + offset, tbname + tblen + suffix, -1 * suffix); - offset += -1 *suffix; + offset += -1 * suffix; } return MurmurHash3_32(tbName, offset); } } #define TSDB_CHECK_CODE(CODE, LINO, LABEL) \ - if (CODE) { \ - LINO = __LINE__; \ - goto LABEL; \ + do { \ + if ((CODE)) { \ + LINO = __LINE__; \ + goto LABEL; \ + } \ + } while (0) + +#define TSDB_CHECK_NULL(ptr, CODE, LINO, LABEL, ERRNO) \ + if ((ptr) == NULL) { \ + (CODE) = (ERRNO); \ + (LINO) = __LINE__; \ + goto LABEL; \ } +#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) + #define VND_CHECK_CODE(CODE, LINO, LABEL) TSDB_CHECK_CODE(CODE, LINO, LABEL) +#define TCONTAINER_OF(ptr, type, member) ((type *)((char *)(ptr)-offsetof(type, member))) + #ifdef __cplusplus } #endif diff --git a/packaging/tools/install.sh b/packaging/tools/install.sh index f311714f3dd0c55cba3d7473521f2b2b560d23d7..961631561e167dd6b78677dea807444de523a2ae 100755 --- a/packaging/tools/install.sh +++ b/packaging/tools/install.sh @@ -613,12 +613,6 @@ function install_examples() { fi } -function install_web() { - if [ -d "${script_dir}/share" ]; then - ${csudo}cp -rf ${script_dir}/share/* ${install_main_dir}/share > /dev/null 2>&1 ||: - fi -} - function clean_service_on_sysvinit() { if ps aux | grep -v grep | grep ${serverName2} &>/dev/null; then @@ -894,7 +888,6 @@ function updateProduct() { fi install_examples - install_web if [ -z $1 ]; then install_bin install_service @@ -907,20 +900,22 @@ function updateProduct() { echo echo -e "${GREEN_DARK}To configure ${productName2} ${NC}: edit ${cfg_install_dir}/${configFile2}" [ -f ${configDir}/${clientName2}adapter.toml ] && [ -f ${installDir}/bin/${clientName2}adapter ] && \ - echo -e "${GREEN_DARK}To configure ${clientName2} Adapter ${NC}: edit ${configDir}/${clientName2}adapter.toml" + echo -e "${GREEN_DARK}To configure ${clientName2}Adapter ${NC}: edit ${configDir}/${clientName2}adapter.toml" if ((${service_mod} == 0)); then echo -e "${GREEN_DARK}To start ${productName2} ${NC}: ${csudo}systemctl start ${serverName2}${NC}" [ -f ${service_config_dir}/${clientName2}adapter.service ] && [ -f ${installDir}/bin/${clientName2}adapter ] && \ - echo -e "${GREEN_DARK}To start ${clientName2} Adapter ${NC}: ${csudo}systemctl start ${clientName2}adapter ${NC}" + echo -e "${GREEN_DARK}To start ${clientName2}Adapter ${NC}: ${csudo}systemctl start ${clientName2}adapter ${NC}" elif ((${service_mod} == 1)); then echo -e "${GREEN_DARK}To start ${productName2} ${NC}: ${csudo}service ${serverName2} start${NC}" [ -f ${service_config_dir}/${clientName2}adapter.service ] && [ -f ${installDir}/bin/${clientName2}adapter ] && \ - echo -e "${GREEN_DARK}To start ${clientName2} Adapter ${NC}: ${csudo}service ${clientName2}adapter start${NC}" + echo -e "${GREEN_DARK}To start ${clientName2}Adapter ${NC}: ${csudo}service ${clientName2}adapter start${NC}" else echo -e "${GREEN_DARK}To start ${productName2} ${NC}: ./${serverName2}${NC}" [ -f ${installDir}/bin/${clientName2}adapter ] && \ - echo -e "${GREEN_DARK}To start ${clientName2} Adapter ${NC}: ${clientName2}adapter &${NC}" + echo -e "${GREEN_DARK}To start ${clientName2}Adapter ${NC}: ${clientName2}adapter &${NC}" fi + + echo -e "${GREEN_DARK}To enable ${clientName2}keeper ${NC}: sudo systemctl enable ${clientName2}keeper &${NC}" if [ ${openresty_work} = 'true' ]; then echo -e "${GREEN_DARK}To access ${productName2} ${NC}: use ${GREEN_UNDERLINE}${clientName2} -h $serverFqdn${NC} in shell OR from ${GREEN_UNDERLINE}http://127.0.0.1:${web_port}${NC}" @@ -934,6 +929,7 @@ function updateProduct() { fi echo echo -e "\033[44;32;1m${productName2} is updated successfully!${NC}" + echo -e "\033[44;32;1mTo manage ${productName2} instance, view documentation and explorer features, you need to install ${clientName2}Explorer ${NC}" else install_bin install_config @@ -971,8 +967,7 @@ function installProduct() { if [ "$verMode" == "cluster" ]; then install_connector fi - install_examples - install_web + install_examples if [ -z $1 ]; then # install service and client # For installing new @@ -989,21 +984,23 @@ function installProduct() { echo echo -e "${GREEN_DARK}To configure ${productName2} ${NC}: edit ${cfg_install_dir}/${configFile2}" [ -f ${configDir}/${clientName2}adapter.toml ] && [ -f ${installDir}/bin/${clientName2}adapter ] && \ - echo -e "${GREEN_DARK}To configure ${clientName2} Adapter ${NC}: edit ${configDir}/${clientName2}adapter.toml" + echo -e "${GREEN_DARK}To configure ${clientName2}Adapter ${NC}: edit ${configDir}/${clientName2}adapter.toml" if ((${service_mod} == 0)); then echo -e "${GREEN_DARK}To start ${productName2} ${NC}: ${csudo}systemctl start ${serverName2}${NC}" [ -f ${service_config_dir}/${clientName2}adapter.service ] && [ -f ${installDir}/bin/${clientName2}adapter ] && \ - echo -e "${GREEN_DARK}To start ${clientName2} Adapter ${NC}: ${csudo}systemctl start ${clientName2}adapter ${NC}" + echo -e "${GREEN_DARK}To start ${clientName2}Adapter ${NC}: ${csudo}systemctl start ${clientName2}adapter ${NC}" elif ((${service_mod} == 1)); then echo -e "${GREEN_DARK}To start ${productName2} ${NC}: ${csudo}service ${serverName2} start${NC}" [ -f ${service_config_dir}/${clientName2}adapter.service ] && [ -f ${installDir}/bin/${clientName2}adapter ] && \ - echo -e "${GREEN_DARK}To start ${clientName2} Adapter ${NC}: ${csudo}service ${clientName2}adapter start${NC}" + echo -e "${GREEN_DARK}To start ${clientName2}Adapter ${NC}: ${csudo}service ${clientName2}adapter start${NC}" else echo -e "${GREEN_DARK}To start ${productName2} ${NC}: ${serverName2}${NC}" [ -f ${installDir}/bin/${clientName2}adapter ] && \ - echo -e "${GREEN_DARK}To start ${clientName2} Adapter ${NC}: ${clientName2}adapter &${NC}" + echo -e "${GREEN_DARK}To start ${clientName2}Adapter ${NC}: ${clientName2}adapter &${NC}" fi + echo -e "${GREEN_DARK}To enable ${clientName2}keeper ${NC}: sudo systemctl enable ${clientName2}keeper &${NC}" + if [ ! -z "$firstEp" ]; then tmpFqdn=${firstEp%%:*} substr=":" @@ -1025,6 +1022,7 @@ function installProduct() { fi echo -e "\033[44;32;1m${productName2} is installed successfully!${NC}" + echo -e "\033[44;32;1mTo manage ${productName2} instance, view documentation and explorer features, you need to install ${clientName2}Explorer ${NC}" echo else # Only install client install_bin diff --git a/packaging/tools/install_client.sh b/packaging/tools/install_client.sh index 8b845ca8f4d1fe6f0afcfac6e8f23326e30e4af0..18ebf9dc8fcf051302933788d3f19aa2cc2b5e91 100755 --- a/packaging/tools/install_client.sh +++ b/packaging/tools/install_client.sh @@ -267,7 +267,9 @@ function install_log() { } function install_connector() { - ${csudo}cp -rf ${script_dir}/connector/ ${install_main_dir}/ + if [ -d ${script_dir}/connector ]; then + ${csudo}cp -rf ${script_dir}/connector/ ${install_main_dir}/ + fi } function install_examples() { diff --git a/packaging/tools/make_install.bat b/packaging/tools/make_install.bat index 41113c9ae45ca73c9623bffff9b921fdf4525f3f..0b2a55b89c231f2bec5ff9499b7b9909b0dc2317 100644 --- a/packaging/tools/make_install.bat +++ b/packaging/tools/make_install.bat @@ -56,8 +56,8 @@ copy %binary_dir%\\build\\bin\\taos.exe %target_dir% > nul if exist %binary_dir%\\build\\bin\\taosBenchmark.exe ( copy %binary_dir%\\build\\bin\\taosBenchmark.exe %target_dir% > nul ) -if exist %binary_dir%\\build\\lib\\taosws.dll.lib ( - copy %binary_dir%\\build\\lib\\taosws.dll.lib %target_dir%\\driver > nul +if exist %binary_dir%\\build\\lib\\taosws.lib ( + copy %binary_dir%\\build\\lib\\taosws.lib %target_dir%\\driver > nul ) if exist %binary_dir%\\build\\lib\\taosws.dll ( copy %binary_dir%\\build\\lib\\taosws.dll %target_dir%\\driver > nul diff --git a/packaging/tools/make_install.sh b/packaging/tools/make_install.sh index c5c70e0aa28696732a47d5a75ad3d38c2e3a0ec3..0a5f9d26683ff7eba2cc3070f4668beacbeaa2b6 100755 --- a/packaging/tools/make_install.sh +++ b/packaging/tools/make_install.sh @@ -432,12 +432,6 @@ function install_examples() { ${csudo}cp -rf ${source_dir}/examples/* ${install_main_dir}/examples || : } -function install_web() { - if [ -d "${binary_dir}/build/share" ]; then - ${csudo}cp -rf ${binary_dir}/build/share/* ${install_main_dir}/share || : - fi -} - function clean_service_on_sysvinit() { if ps aux | grep -v grep | grep ${serverName} &>/dev/null; then ${csudo}service ${serverName} stop || : @@ -592,7 +586,6 @@ function update_TDengine() { install_lib # install_connector install_examples - install_web install_bin install_app diff --git a/packaging/tools/makepkg.sh b/packaging/tools/makepkg.sh index 6c389502b76cfae68df9882f7ce4713e8c850c01..a48d264d5debb9b6c771ed52ca07a59239b9e05d 100755 --- a/packaging/tools/makepkg.sh +++ b/packaging/tools/makepkg.sh @@ -126,7 +126,6 @@ else fi install_files="${script_dir}/install.sh" -web_dir="${top_dir}/../enterprise/src/plugins/web" init_file_deb=${script_dir}/../deb/taosd init_file_rpm=${script_dir}/../rpm/taosd @@ -320,17 +319,6 @@ if [[ $dbName == "taos" ]]; then mkdir -p ${install_dir}/examples/taosbenchmark-json && cp ${examples_dir}/../tools/taos-tools/example/* ${install_dir}/examples/taosbenchmark-json fi - # Add web files - if [ "$verMode" == "cluster" ] || [ "$verMode" == "cloud" ]; then - if [ -d "${web_dir}/admin" ] ; then - mkdir -p ${install_dir}/share/ - cp -Rfap ${web_dir}/admin ${install_dir}/share/ - cp ${web_dir}/png/taos.png ${install_dir}/share/admin/images/taos.png - cp -rf ${build_dir}/share/{etc,srv} ${install_dir}/share ||: - else - echo "directory not found for enterprise release: ${web_dir}/admin" - fi - fi fi # Copy driver diff --git a/source/client/inc/clientInt.h b/source/client/inc/clientInt.h index 736582dff2595485819d36a6b7b44dccd6440145..aa7caaaba3c7453dea3adb604b0e64e5a60ec231 100644 --- a/source/client/inc/clientInt.h +++ b/source/client/inc/clientInt.h @@ -46,9 +46,10 @@ enum { RES_TYPE__TMQ_METADATA, }; -#define SHOW_VARIABLES_RESULT_COLS 2 +#define SHOW_VARIABLES_RESULT_COLS 3 #define SHOW_VARIABLES_RESULT_FIELD1_LEN (TSDB_CONFIG_OPTION_LEN + VARSTR_HEADER_SIZE) #define SHOW_VARIABLES_RESULT_FIELD2_LEN (TSDB_CONFIG_VALUE_LEN + VARSTR_HEADER_SIZE) +#define SHOW_VARIABLES_RESULT_FIELD3_LEN (TSDB_CONFIG_SCOPE_LEN + VARSTR_HEADER_SIZE) #define TD_RES_QUERY(res) (*(int8_t*)res == RES_TYPE__QUERY) #define TD_RES_TMQ(res) (*(int8_t*)res == RES_TYPE__TMQ) diff --git a/source/client/inc/clientSml.h b/source/client/inc/clientSml.h index c9eb95101495abda29f88e6876464560b94b849a..040064560c20ea267d8d005543a54465d916ee6a 100644 --- a/source/client/inc/clientSml.h +++ b/source/client/inc/clientSml.h @@ -64,8 +64,8 @@ extern "C" { #define IS_INVALID_COL_LEN(len) ((len) <= 0 || (len) >= TSDB_COL_NAME_LEN) #define IS_INVALID_TABLE_LEN(len) ((len) <= 0 || (len) >= TSDB_TABLE_NAME_LEN) -#define TS "_ts" -#define TS_LEN 3 +//#define TS "_ts" +//#define TS_LEN 3 #define VALUE "_value" #define VALUE_LEN 6 @@ -258,6 +258,7 @@ int32_t smlParseInfluxString(SSmlHandle *info, char *sql, char *sqlEnd, SSmlLine int32_t smlParseTelnetString(SSmlHandle *info, char *sql, char *sqlEnd, SSmlLineInfo *elements); int32_t smlParseJSON(SSmlHandle *info, char *payload); +void smlStrReplace(char* src, int32_t len); #ifdef __cplusplus } #endif diff --git a/source/client/src/clientImpl.c b/source/client/src/clientImpl.c index 14d6394fc459e8e6302f937364734faf360afd31..d448dd1edf7e49c34f0a736bece4b8febad6c96c 100644 --- a/source/client/src/clientImpl.c +++ b/source/client/src/clientImpl.c @@ -1297,13 +1297,19 @@ int initEpSetFromCfg(const char* firstEp, const char* secondEp, SCorEpSet* pEpSe return -1; } - int32_t code = taosGetFqdnPortFromEp(firstEp, &mgmtEpSet->eps[0]); + int32_t code = taosGetFqdnPortFromEp(firstEp, &mgmtEpSet->eps[mgmtEpSet->numOfEps]); if (code != TSDB_CODE_SUCCESS) { terrno = TSDB_CODE_TSC_INVALID_FQDN; return terrno; } - - mgmtEpSet->numOfEps++; + uint32_t addr = taosGetIpv4FromFqdn(mgmtEpSet->eps[mgmtEpSet->numOfEps].fqdn); + if (addr == 0xffffffff) { + tscError("failed to resolve firstEp fqdn: %s, code:%s", mgmtEpSet->eps[mgmtEpSet->numOfEps].fqdn, + tstrerror(TSDB_CODE_TSC_INVALID_FQDN)); + memset(&(mgmtEpSet->eps[mgmtEpSet->numOfEps]), 0, sizeof(mgmtEpSet->eps[mgmtEpSet->numOfEps])); + } else { + mgmtEpSet->numOfEps++; + } } if (secondEp && secondEp[0] != 0) { @@ -1313,12 +1319,19 @@ int initEpSetFromCfg(const char* firstEp, const char* secondEp, SCorEpSet* pEpSe } taosGetFqdnPortFromEp(secondEp, &mgmtEpSet->eps[mgmtEpSet->numOfEps]); - mgmtEpSet->numOfEps++; + uint32_t addr = taosGetIpv4FromFqdn(mgmtEpSet->eps[mgmtEpSet->numOfEps].fqdn); + if (addr == 0xffffffff) { + tscError("failed to resolve secondEp fqdn: %s, code:%s", mgmtEpSet->eps[mgmtEpSet->numOfEps].fqdn, + tstrerror(TSDB_CODE_TSC_INVALID_FQDN)); + memset(&(mgmtEpSet->eps[mgmtEpSet->numOfEps]), 0, sizeof(mgmtEpSet->eps[mgmtEpSet->numOfEps])); + } else { + mgmtEpSet->numOfEps++; + } } if (mgmtEpSet->numOfEps == 0) { - terrno = TSDB_CODE_TSC_INVALID_FQDN; - return -1; + terrno = TSDB_CODE_RPC_NETWORK_UNAVAIL; + return TSDB_CODE_RPC_NETWORK_UNAVAIL; } return 0; diff --git a/source/client/src/clientMsgHandler.c b/source/client/src/clientMsgHandler.c index 9ab618cf3a0cc737e37297d92d0a8c42b524f9dc..9f9809b2274592fcb9bd75ba32907a040a4a7a5d 100644 --- a/source/client/src/clientMsgHandler.c +++ b/source/client/src/clientMsgHandler.c @@ -99,13 +99,20 @@ int32_t processConnectRsp(void* param, SDataBuf* pMsg, int32_t code) { goto End; } + int updateEpSet = 1; if (connectRsp.dnodeNum == 1) { SEpSet srcEpSet = getEpSet_s(&pTscObj->pAppInfo->mgmtEp); SEpSet dstEpSet = connectRsp.epSet; - rpcSetDefaultAddr(pTscObj->pAppInfo->pTransporter, srcEpSet.eps[srcEpSet.inUse].fqdn, - dstEpSet.eps[dstEpSet.inUse].fqdn); - } else if (connectRsp.dnodeNum > 1 && !isEpsetEqual(&pTscObj->pAppInfo->mgmtEp.epSet, &connectRsp.epSet)) { - SEpSet* pOrig = &pTscObj->pAppInfo->mgmtEp.epSet; + if (srcEpSet.numOfEps == 1) { + rpcSetDefaultAddr(pTscObj->pAppInfo->pTransporter, srcEpSet.eps[srcEpSet.inUse].fqdn, + dstEpSet.eps[dstEpSet.inUse].fqdn); + updateEpSet = 0; + } + } + if (updateEpSet == 1 && !isEpsetEqual(&pTscObj->pAppInfo->mgmtEp.epSet, &connectRsp.epSet)) { + SEpSet corEpSet = getEpSet_s(&pTscObj->pAppInfo->mgmtEp); + + SEpSet* pOrig = &corEpSet; SEp* pOrigEp = &pOrig->eps[pOrig->inUse]; SEp* pNewEp = &connectRsp.epSet.eps[connectRsp.epSet.inUse]; tscDebug("mnode epset updated from %d/%d=>%s:%d to %d/%d=>%s:%d in connRsp", pOrig->inUse, pOrig->numOfEps, @@ -428,13 +435,16 @@ static int32_t buildShowVariablesBlock(SArray* pVars, SSDataBlock** block) { SColumnInfoData infoData = {0}; infoData.info.type = TSDB_DATA_TYPE_VARCHAR; infoData.info.bytes = SHOW_VARIABLES_RESULT_FIELD1_LEN; - taosArrayPush(pBlock->pDataBlock, &infoData); infoData.info.type = TSDB_DATA_TYPE_VARCHAR; infoData.info.bytes = SHOW_VARIABLES_RESULT_FIELD2_LEN; taosArrayPush(pBlock->pDataBlock, &infoData); + infoData.info.type = TSDB_DATA_TYPE_VARCHAR; + infoData.info.bytes = SHOW_VARIABLES_RESULT_FIELD3_LEN; + taosArrayPush(pBlock->pDataBlock, &infoData); + int32_t numOfCfg = taosArrayGetSize(pVars); blockDataEnsureCapacity(pBlock, numOfCfg); @@ -450,6 +460,11 @@ static int32_t buildShowVariablesBlock(SArray* pVars, SSDataBlock** block) { STR_WITH_MAXSIZE_TO_VARSTR(value, pInfo->value, TSDB_CONFIG_VALUE_LEN + VARSTR_HEADER_SIZE); pColInfo = taosArrayGet(pBlock->pDataBlock, c++); colDataSetVal(pColInfo, i, value, false); + + char scope[TSDB_CONFIG_SCOPE_LEN + VARSTR_HEADER_SIZE] = {0}; + STR_WITH_MAXSIZE_TO_VARSTR(scope, pInfo->scope, TSDB_CONFIG_SCOPE_LEN + VARSTR_HEADER_SIZE); + pColInfo = taosArrayGet(pBlock->pDataBlock, c++); + colDataSetVal(pColInfo, i, scope, false); } pBlock->info.rows = numOfCfg; diff --git a/source/client/src/clientRawBlockWrite.c b/source/client/src/clientRawBlockWrite.c index 90b10e0920e83ffe68103074a51d017531f3c04d..dd311db1267967a6ca657346bdf95095aeec15fb 100644 --- a/source/client/src/clientRawBlockWrite.c +++ b/source/client/src/clientRawBlockWrite.c @@ -1327,6 +1327,9 @@ end: int taos_write_raw_block_with_fields(TAOS* taos, int rows, char* pData, const char* tbname, TAOS_FIELD* fields, int numFields) { + if (!taos || !pData || !tbname) { + return TSDB_CODE_INVALID_PARA; + } int32_t code = TSDB_CODE_SUCCESS; STableMeta* pTableMeta = NULL; SQuery* pQuery = NULL; @@ -1413,6 +1416,9 @@ end: } int taos_write_raw_block(TAOS* taos, int rows, char* pData, const char* tbname) { + if (!taos || !pData || !tbname) { + return TSDB_CODE_INVALID_PARA; + } int32_t code = TSDB_CODE_SUCCESS; STableMeta* pTableMeta = NULL; SQuery* pQuery = NULL; @@ -1812,6 +1818,7 @@ end: } char* tmq_get_json_meta(TAOS_RES* res) { + if (res == NULL) return NULL; uDebug("tmq_get_json_meta called"); if (!TD_RES_TMQ_META(res) && !TD_RES_TMQ_METADATA(res)) { return NULL; diff --git a/source/client/src/clientSml.c b/source/client/src/clientSml.c index 13dc019feb29892fd1b48bf7fb8051f1da216652..ffff3df5d02797c0dd6d289e868a12a046d2d9f4 100644 --- a/source/client/src/clientSml.c +++ b/source/client/src/clientSml.c @@ -104,7 +104,7 @@ static int32_t smlCheckAuth(SSmlHandle *info, SRequestConnInfo* conn, const cha SUserAuthRes authRes = {0}; code = catalogChkAuth(info->pCatalog, conn, &pAuth, &authRes); - + nodesDestroyNode(authRes.pCond); return (code == TSDB_CODE_SUCCESS) ? (authRes.pass ? TSDB_CODE_SUCCESS : TSDB_CODE_PAR_PERMISSION_DENIED) : code; @@ -114,6 +114,15 @@ inline bool smlDoubleToInt64OverFlow(double num) { return false; } +void smlStrReplace(char* src, int32_t len){ + if (!tsSmlDot2Underline) return; + for(int i = 0; i < len; i++){ + if(src[i] == '.'){ + src[i] = '_'; + } + } +} + int32_t smlBuildInvalidDataMsg(SSmlMsgBuf *pBuf, const char *msg1, const char *msg2) { if (pBuf->buf) { memset(pBuf->buf, 0, pBuf->len); @@ -193,6 +202,9 @@ static int32_t smlParseTableName(SArray *tags, char *childTableName) { if (childTableNameLen == tag->keyLen && strncmp(tag->key, tsSmlChildTableName, tag->keyLen) == 0) { memset(childTableName, 0, TSDB_TABLE_NAME_LEN); strncpy(childTableName, tag->value, (tag->length < TSDB_TABLE_NAME_LEN ? tag->length : TSDB_TABLE_NAME_LEN)); + if(tsSmlDot2Underline){ + smlStrReplace(childTableName, strlen(childTableName)); + } taosArrayRemove(tags, i); break; } @@ -838,6 +850,7 @@ static int32_t smlModifyDBSchemas(SSmlHandle *info) { char *measure = taosMemoryMalloc(superTableLen); memcpy(measure, superTable, superTableLen); PROCESS_SLASH_IN_MEASUREMENT(measure, superTableLen); + smlStrReplace(measure, superTableLen); memset(pName.tname, 0, TSDB_TABLE_NAME_LEN); memcpy(pName.tname, measure, superTableLen); taosMemoryFree(measure); @@ -1051,7 +1064,8 @@ static int32_t smlModifyDBSchemas(SSmlHandle *info) { taosMemoryFreeClear(sTableData->tableMeta); sTableData->tableMeta = pTableMeta; uDebug("SML:0x%" PRIx64 "modify schema uid:%" PRIu64 ", sversion:%d, tversion:%d", info->id, pTableMeta->uid, - pTableMeta->sversion, pTableMeta->tversion) tmp = (SSmlSTableMeta **)taosHashIterate(info->superTables, tmp); + pTableMeta->sversion, pTableMeta->tversion); + tmp = (SSmlSTableMeta **)taosHashIterate(info->superTables, tmp); } uDebug("SML:0x%" PRIx64 " smlModifyDBSchemas end success, format:%d, needModifySchema:%d", info->id, info->dataFormat, info->needModifySchema); @@ -1394,7 +1408,14 @@ static int32_t smlInsertData(SSmlHandle *info) { SSmlTableInfo **oneTable = (SSmlTableInfo **)taosHashIterate(info->childTables, NULL); while (oneTable) { SSmlTableInfo *tableData = *oneTable; - tstrncpy(pName.tname, tableData->sTableName, tableData->sTableNameLen + 1); + + int measureLen = tableData->sTableNameLen; + char *measure = (char *)taosMemoryMalloc(tableData->sTableNameLen); + memcpy(measure, tableData->sTableName, tableData->sTableNameLen); + PROCESS_SLASH_IN_MEASUREMENT(measure, measureLen); + smlStrReplace(measure, measureLen); + memset(pName.tname, 0, TSDB_TABLE_NAME_LEN); + memcpy(pName.tname, measure, measureLen); if (info->pRequest->tableList == NULL) { info->pRequest->tableList = taosArrayInit(1, sizeof(SName)); @@ -1411,6 +1432,7 @@ static int32_t smlInsertData(SSmlHandle *info) { code = smlCheckAuth(info, &conn, pName.tname, AUTH_TYPE_WRITE); if(code != TSDB_CODE_SUCCESS){ + taosMemoryFree(measure); return code; } @@ -1418,6 +1440,7 @@ static int32_t smlInsertData(SSmlHandle *info) { code = catalogGetTableHashVgroup(info->pCatalog, &conn, &pName, &vg); if (code != TSDB_CODE_SUCCESS) { uError("SML:0x%" PRIx64 " catalogGetTableHashVgroup failed. table name: %s", info->id, tableData->childTableName); + taosMemoryFree(measure); return code; } taosHashPut(info->pVgHash, (const char *)&vg.vgId, sizeof(vg.vgId), (char *)&vg, sizeof(vg)); @@ -1426,6 +1449,7 @@ static int32_t smlInsertData(SSmlHandle *info) { (SSmlSTableMeta **)taosHashGet(info->superTables, tableData->sTableName, tableData->sTableNameLen); if (unlikely(NULL == pMeta || NULL == (*pMeta)->tableMeta)) { uError("SML:0x%" PRIx64 " NULL == pMeta. table name: %s", info->id, tableData->childTableName); + taosMemoryFree(measure); return TSDB_CODE_SML_INTERNAL_ERROR; } @@ -1435,11 +1459,6 @@ static int32_t smlInsertData(SSmlHandle *info) { uDebug("SML:0x%" PRIx64 " smlInsertData table:%s, uid:%" PRIu64 ", format:%d", info->id, pName.tname, tableData->uid, info->dataFormat); - int measureLen = tableData->sTableNameLen; - char *measure = (char *)taosMemoryMalloc(tableData->sTableNameLen); - memcpy(measure, tableData->sTableName, tableData->sTableNameLen); - PROCESS_SLASH_IN_MEASUREMENT(measure, measureLen); - code = smlBindData(info->pQuery, info->dataFormat, tableData->tags, (*pMeta)->cols, tableData->cols, (*pMeta)->tableMeta, tableData->childTableName, measure, measureLen, info->ttl, info->msgBuf.buf, info->msgBuf.len); diff --git a/source/client/src/clientSmlJson.c b/source/client/src/clientSmlJson.c index 0f59505f8c02c64543ebb674c8ad86ae996a3f0f..76794fd187f6c06d2297b5be0b57547d516e2402 100644 --- a/source/client/src/clientSmlJson.c +++ b/source/client/src/clientSmlJson.c @@ -456,7 +456,7 @@ int smlJsonParseObj(char **start, SSmlLineInfo *element, int8_t *offset) { static inline int32_t smlParseMetricFromJSON(SSmlHandle *info, cJSON *metric, SSmlLineInfo *elements) { elements->measureLen = strlen(metric->valuestring); if (IS_INVALID_TABLE_LEN(elements->measureLen)) { - uError("OTD:0x%" PRIx64 " Metric lenght is 0 or large than 192", info->id); + uError("OTD:0x%" PRIx64 " Metric length is 0 or large than 192", info->id); return TSDB_CODE_TSC_INVALID_TABLE_ID_LENGTH; } @@ -996,8 +996,8 @@ static int32_t smlParseJSONStringExt(SSmlHandle *info, cJSON *root, SSmlLineInfo uError("OTD:0x%" PRIx64 " Unable to parse timestamp from JSON payload", info->id); return TSDB_CODE_INVALID_TIMESTAMP; } - SSmlKv kvTs = {.key = TS, - .keyLen = TS_LEN, + SSmlKv kvTs = {.key = tsSmlTsDefaultName, + .keyLen = strlen(tsSmlTsDefaultName), .type = TSDB_DATA_TYPE_TIMESTAMP, .i = ts, .length = (size_t)tDataTypes[TSDB_DATA_TYPE_TIMESTAMP].bytes}; @@ -1200,8 +1200,8 @@ static int32_t smlParseJSONString(SSmlHandle *info, char **start, SSmlLineInfo * return TSDB_CODE_INVALID_TIMESTAMP; } } - SSmlKv kvTs = {.key = TS, - .keyLen = TS_LEN, + SSmlKv kvTs = {.key = tsSmlTsDefaultName, + .keyLen = strlen(tsSmlTsDefaultName), .type = TSDB_DATA_TYPE_TIMESTAMP, .i = ts, .length = (size_t)tDataTypes[TSDB_DATA_TYPE_TIMESTAMP].bytes}; diff --git a/source/client/src/clientSmlLine.c b/source/client/src/clientSmlLine.c index c5832ce1ce526021696cdbd0d850fa93515f9add..1ee2cfbedf38c17fb557296505323569f590eddc 100644 --- a/source/client/src/clientSmlLine.c +++ b/source/client/src/clientSmlLine.c @@ -157,6 +157,7 @@ static int32_t smlParseTagKv(SSmlHandle *info, char **sql, char *sqlEnd, SSmlLin measure = (char *)taosMemoryMalloc(currElement->measureLen); memcpy(measure, currElement->measure, currElement->measureLen); PROCESS_SLASH_IN_MEASUREMENT(measure, measureLen); + smlStrReplace(measure, measureLen); } STableMeta *pTableMeta = smlGetMeta(info, measure, measureLen); if (currElement->measureEscaped) { @@ -365,6 +366,7 @@ static int32_t smlParseColKv(SSmlHandle *info, char **sql, char *sqlEnd, SSmlLin measure = (char *)taosMemoryMalloc(currElement->measureLen); memcpy(measure, currElement->measure, currElement->measureLen); PROCESS_SLASH_IN_MEASUREMENT(measure, measureLen); + smlStrReplace(measure, measureLen); } STableMeta *pTableMeta = smlGetMeta(info, measure, measureLen); if (currElement->measureEscaped) { @@ -651,8 +653,8 @@ int32_t smlParseInfluxString(SSmlHandle *info, char *sql, char *sqlEnd, SSmlLine return TSDB_CODE_INVALID_TIMESTAMP; } // add ts to - SSmlKv kv = {.key = TS, - .keyLen = TS_LEN, + SSmlKv kv = {.key = tsSmlTsDefaultName, + .keyLen = strlen(tsSmlTsDefaultName), .type = TSDB_DATA_TYPE_TIMESTAMP, .i = ts, .length = (size_t)tDataTypes[TSDB_DATA_TYPE_TIMESTAMP].bytes, diff --git a/source/client/src/clientSmlTelnet.c b/source/client/src/clientSmlTelnet.c index b3f45a3107a239ba9f4b38161fd77a0635ca2879..c378f9b1c3a7198b49afb760a8fe8782d3765e32 100644 --- a/source/client/src/clientSmlTelnet.c +++ b/source/client/src/clientSmlTelnet.c @@ -260,8 +260,8 @@ int32_t smlParseTelnetString(SSmlHandle *info, char *sql, char *sqlEnd, SSmlLine smlBuildInvalidDataMsg(&info->msgBuf, "invalid timestamp", sql); return TSDB_CODE_INVALID_TIMESTAMP; } - SSmlKv kvTs = {.key = TS, - .keyLen = TS_LEN, + SSmlKv kvTs = {.key = tsSmlTsDefaultName, + .keyLen = strlen(tsSmlTsDefaultName), .type = TSDB_DATA_TYPE_TIMESTAMP, .i = ts, .length = (size_t)tDataTypes[TSDB_DATA_TYPE_TIMESTAMP].bytes}; diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index 6a0c3171fb800411ead740f2f6407f5773dc343c..ae82be2470b62599626ce7bcb8f7d5d38c7dfd12 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -139,7 +139,8 @@ enum { typedef struct SVgOffsetInfo { STqOffsetVal committedOffset; - STqOffsetVal currentOffset; + STqOffsetVal endOffset; // the last version in TAOS_RES + 1 + STqOffsetVal beginOffset; // the first version in TAOS_RES int64_t walVerBegin; int64_t walVerEnd; } SVgOffsetInfo; @@ -214,6 +215,17 @@ typedef struct SMqVgCommon { int32_t code; } SMqVgCommon; +typedef struct SMqSeekParam { + tsem_t sem; + int32_t code; +} SMqSeekParam; + +typedef struct SMqCommittedParam { + tsem_t sem; + int32_t code; + SMqVgOffset vgOffset; +} SMqCommittedParam; + typedef struct SMqVgWalInfoParam { int32_t vgId; int32_t epoch; @@ -235,7 +247,7 @@ typedef struct { typedef struct { SMqCommitCbParamSet* params; - SMqVgOffset* pOffset; +// SMqVgOffset* pOffset; char topicName[TSDB_TOPIC_FNAME_LEN]; int32_t vgId; tmq_t* pTmq; @@ -249,8 +261,7 @@ typedef struct SSyncCommitInfo { static int32_t doAskEp(tmq_t* tmq); static int32_t makeTopicVgroupKey(char* dst, const char* topicName, int32_t vg); static int32_t tmqCommitDone(SMqCommitCbParamSet* pParamSet); -static int32_t doSendCommitMsg(tmq_t* tmq, SMqClientVg* pVg, const char* pTopicName, SMqCommitCbParamSet* pParamSet, - int32_t index, int32_t totalVgroups, int32_t type); +static int32_t doSendCommitMsg(tmq_t* tmq, int32_t vgId, SEpSet* epSet, STqOffsetVal* offset, const char* pTopicName, SMqCommitCbParamSet* pParamSet); static void commitRspCountDown(SMqCommitCbParamSet* pParamSet, int64_t consumerId, const char* pTopic, int32_t vgId); static void asyncAskEp(tmq_t* pTmq, __tmq_askep_fn_t askEpFn, void* param); static void addToQueueCallbackFn(tmq_t* pTmq, int32_t code, SDataBuf* pDataBuf, void* param); @@ -287,6 +298,9 @@ void tmq_conf_destroy(tmq_conf_t* conf) { } tmq_conf_res_t tmq_conf_set(tmq_conf_t* conf, const char* key, const char* value) { + if (conf == NULL || key == NULL || value == NULL){ + return TMQ_CONF_INVALID; + } if (strcasecmp(key, "group.id") == 0) { tstrncpy(conf->groupId, value, TSDB_CGROUP_LEN); return TMQ_CONF_OK; @@ -401,6 +415,7 @@ tmq_conf_res_t tmq_conf_set(tmq_conf_t* conf, const char* key, const char* value tmq_list_t* tmq_list_new() { return (tmq_list_t*)taosArrayInit(0, sizeof(void*)); } int32_t tmq_list_append(tmq_list_t* list, const char* src) { + if(list == NULL) return -1; SArray* container = &list->container; if (src == NULL || src[0] == 0) return -1; char* topic = taosStrdup(src); @@ -409,84 +424,28 @@ int32_t tmq_list_append(tmq_list_t* list, const char* src) { } void tmq_list_destroy(tmq_list_t* list) { + if(list == NULL) return; SArray* container = &list->container; taosArrayDestroyP(container, taosMemoryFree); } int32_t tmq_list_get_size(const tmq_list_t* list) { + if(list == NULL) return -1; const SArray* container = &list->container; return taosArrayGetSize(container); } char** tmq_list_to_c_array(const tmq_list_t* list) { + if(list == NULL) return NULL; const SArray* container = &list->container; return container->pData; } -//static SMqClientVg* foundClientVg(SArray* pTopicList, const char* pName, int32_t vgId, int32_t* index, -// int32_t* numOfVgroups) { -// int32_t numOfTopics = taosArrayGetSize(pTopicList); -// *index = -1; -// *numOfVgroups = 0; -// -// for (int32_t i = 0; i < numOfTopics; ++i) { -// SMqClientTopic* pTopic = taosArrayGet(pTopicList, i); -// if (strcmp(pTopic->topicName, pName) != 0) { -// continue; -// } -// -// *numOfVgroups = taosArrayGetSize(pTopic->vgs); -// for (int32_t j = 0; j < (*numOfVgroups); ++j) { -// SMqClientVg* pClientVg = taosArrayGet(pTopic->vgs, j); -// if (pClientVg->vgId == vgId) { -// *index = j; -// return pClientVg; -// } -// } -// } -// -// return NULL; -//} - -// Two problems do not need to be addressed here -// 1. update to of epset. the response of poll request will automatically handle this problem -// 2. commit failure. This one needs to be resolved. static int32_t tmqCommitCb(void* param, SDataBuf* pBuf, int32_t code) { SMqCommitCbParam* pParam = (SMqCommitCbParam*)param; SMqCommitCbParamSet* pParamSet = (SMqCommitCbParamSet*)pParam->params; - // if (code != TSDB_CODE_SUCCESS) { // if commit offset failed, let's try again - // taosThreadMutexLock(&pParam->pTmq->lock); - // int32_t numOfVgroups, index; - // SMqClientVg* pVg = foundClientVg(pParam->pTmq->clientTopics, pParam->topicName, pParam->vgId, &index, - // &numOfVgroups); if (pVg == NULL) { - // tscDebug("consumer:0x%" PRIx64 - // " subKey:%s vgId:%d commit failed, code:%s has been transferred to other consumer, no need retry - // ordinal:%d/%d", pParam->pTmq->consumerId, pParam->pOffset->subKey, pParam->vgId, tstrerror(code), - // index + 1, numOfVgroups); - // } else { // let's retry the commit - // int32_t code1 = doSendCommitMsg(pParam->pTmq, pVg, pParam->topicName, pParamSet, index, numOfVgroups); - // if (code1 != TSDB_CODE_SUCCESS) { // retry failed. - // tscError("consumer:0x%" PRIx64 " topic:%s vgId:%d offset:%" PRId64 - // " retry failed, ignore this commit. code:%s ordinal:%d/%d", - // pParam->pTmq->consumerId, pParam->topicName, pVg->vgId, pVg->offsetInfo.committedOffset.version, - // tstrerror(terrno), index + 1, numOfVgroups); - // } - // } - // - // taosThreadMutexUnlock(&pParam->pTmq->lock); - // - // taosMemoryFree(pParam->pOffset); - // taosMemoryFree(pBuf->pData); - // taosMemoryFree(pBuf->pEpSet); - // - // commitRspCountDown(pParamSet, pParam->pTmq->consumerId, pParam->topicName, pParam->vgId); - // return 0; - // } - // - // // todo replace the pTmq with refId - - taosMemoryFree(pParam->pOffset); +// taosMemoryFree(pParam->pOffset); taosMemoryFree(pBuf->pData); taosMemoryFree(pBuf->pEpSet); @@ -494,54 +453,48 @@ static int32_t tmqCommitCb(void* param, SDataBuf* pBuf, int32_t code) { return 0; } -static int32_t doSendCommitMsg(tmq_t* tmq, SMqClientVg* pVg, const char* pTopicName, SMqCommitCbParamSet* pParamSet, - int32_t index, int32_t totalVgroups, int32_t type) { - SMqVgOffset* pOffset = taosMemoryCalloc(1, sizeof(SMqVgOffset)); - if (pOffset == NULL) { - return TSDB_CODE_OUT_OF_MEMORY; - } +static int32_t doSendCommitMsg(tmq_t* tmq, int32_t vgId, SEpSet* epSet, STqOffsetVal* offset, const char* pTopicName, SMqCommitCbParamSet* pParamSet) { + SMqVgOffset pOffset = {0}; - pOffset->consumerId = tmq->consumerId; - pOffset->offset.val = pVg->offsetInfo.currentOffset; + pOffset.consumerId = tmq->consumerId; + pOffset.offset.val = *offset; int32_t groupLen = strlen(tmq->groupId); - memcpy(pOffset->offset.subKey, tmq->groupId, groupLen); - pOffset->offset.subKey[groupLen] = TMQ_SEPARATOR; - strcpy(pOffset->offset.subKey + groupLen + 1, pTopicName); + memcpy(pOffset.offset.subKey, tmq->groupId, groupLen); + pOffset.offset.subKey[groupLen] = TMQ_SEPARATOR; + strcpy(pOffset.offset.subKey + groupLen + 1, pTopicName); int32_t len = 0; int32_t code = 0; - tEncodeSize(tEncodeMqVgOffset, pOffset, len, code); + tEncodeSize(tEncodeMqVgOffset, &pOffset, len, code); if (code < 0) { return TSDB_CODE_INVALID_PARA; } void* buf = taosMemoryCalloc(1, sizeof(SMsgHead) + len); if (buf == NULL) { - taosMemoryFree(pOffset); return TSDB_CODE_OUT_OF_MEMORY; } - ((SMsgHead*)buf)->vgId = htonl(pVg->vgId); + ((SMsgHead*)buf)->vgId = htonl(vgId); void* abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); SEncoder encoder; tEncoderInit(&encoder, abuf, len); - tEncodeMqVgOffset(&encoder, pOffset); + tEncodeMqVgOffset(&encoder, &pOffset); tEncoderClear(&encoder); // build param SMqCommitCbParam* pParam = taosMemoryCalloc(1, sizeof(SMqCommitCbParam)); if (pParam == NULL) { - taosMemoryFree(pOffset); taosMemoryFree(buf); return TSDB_CODE_OUT_OF_MEMORY; } pParam->params = pParamSet; - pParam->pOffset = pOffset; - pParam->vgId = pVg->vgId; +// pParam->pOffset = pOffset; + pParam->vgId = vgId; pParam->pTmq = tmq; tstrncpy(pParam->topicName, pTopicName, tListLen(pParam->topicName)); @@ -549,7 +502,6 @@ static int32_t doSendCommitMsg(tmq_t* tmq, SMqClientVg* pVg, const char* pTopicN // build send info SMsgSendInfo* pMsgSendInfo = taosMemoryCalloc(1, sizeof(SMsgSendInfo)); if (pMsgSendInfo == NULL) { - taosMemoryFree(pOffset); taosMemoryFree(buf); taosMemoryFree(pParam); return TSDB_CODE_OUT_OF_MEMORY; @@ -562,25 +514,16 @@ static int32_t doSendCommitMsg(tmq_t* tmq, SMqClientVg* pVg, const char* pTopicN pMsgSendInfo->param = pParam; pMsgSendInfo->paramFreeFp = taosMemoryFree; pMsgSendInfo->fp = tmqCommitCb; - pMsgSendInfo->msgType = type; + pMsgSendInfo->msgType = TDMT_VND_TMQ_COMMIT_OFFSET; atomic_add_fetch_32(&pParamSet->waitingRspNum, 1); atomic_add_fetch_32(&pParamSet->totalRspNum, 1); - SEp* pEp = GET_ACTIVE_EP(&pVg->epSet); - char offsetBuf[TSDB_OFFSET_LEN] = {0}; - tFormatOffset(offsetBuf, tListLen(offsetBuf), &pOffset->offset.val); + SEp* pEp = GET_ACTIVE_EP(epSet); - char commitBuf[TSDB_OFFSET_LEN] = {0}; - tFormatOffset(commitBuf, tListLen(commitBuf), &pVg->offsetInfo.committedOffset); - tscInfo("consumer:0x%" PRIx64 " topic:%s on vgId:%d send offset:%s prev:%s, ep:%s:%d, ordinal:%d/%d, req:0x%" PRIx64, - tmq->consumerId, pOffset->offset.subKey, pVg->vgId, offsetBuf, commitBuf, pEp->fqdn, pEp->port, index + 1, - totalVgroups, pMsgSendInfo->requestId); int64_t transporterId = 0; - asyncSendMsgToServer(tmq->pTscObj->pAppInfo->pTransporter, &pVg->epSet, &transporterId, pMsgSendInfo); - - return TSDB_CODE_SUCCESS; + return asyncSendMsgToServer(tmq->pTscObj->pAppInfo->pTransporter, epSet, &transporterId, pMsgSendInfo); } static SMqClientTopic* getTopicByName(tmq_t* tmq, const char* pTopicName) { @@ -598,154 +541,188 @@ static SMqClientTopic* getTopicByName(tmq_t* tmq, const char* pTopicName) { return NULL; } -static void asyncCommitOffset(tmq_t* tmq, const TAOS_RES* pRes, int32_t type, tmq_commit_cb* pCommitFp, void* userParam) { - char* pTopicName = NULL; - int32_t vgId = 0; - int32_t code = 0; - - if (pRes == NULL || tmq == NULL) { - pCommitFp(tmq, TSDB_CODE_INVALID_PARA, userParam); - return; - } - - if (TD_RES_TMQ(pRes)) { - SMqRspObj* pRspObj = (SMqRspObj*)pRes; - pTopicName = pRspObj->topic; - vgId = pRspObj->vgId; - } else if (TD_RES_TMQ_META(pRes)) { - SMqMetaRspObj* pMetaRspObj = (SMqMetaRspObj*)pRes; - pTopicName = pMetaRspObj->topic; - vgId = pMetaRspObj->vgId; - } else if (TD_RES_TMQ_METADATA(pRes)) { - SMqTaosxRspObj* pRspObj = (SMqTaosxRspObj*)pRes; - pTopicName = pRspObj->topic; - vgId = pRspObj->vgId; - } else { - pCommitFp(tmq, TSDB_CODE_TMQ_INVALID_MSG, userParam); - return; - } - +static SMqCommitCbParamSet* prepareCommitCbParamSet(tmq_t* tmq, tmq_commit_cb* pCommitFp, void* userParam, int32_t rspNum){ SMqCommitCbParamSet* pParamSet = taosMemoryCalloc(1, sizeof(SMqCommitCbParamSet)); if (pParamSet == NULL) { - pCommitFp(tmq, TSDB_CODE_OUT_OF_MEMORY, userParam); - return; + return NULL; } pParamSet->refId = tmq->refId; pParamSet->epoch = tmq->epoch; pParamSet->callbackFn = pCommitFp; pParamSet->userParam = userParam; + pParamSet->waitingRspNum = rspNum; - taosRLockLatch(&tmq->lock); - int32_t numOfTopics = taosArrayGetSize(tmq->clientTopics); + return pParamSet; +} - tscDebug("consumer:0x%" PRIx64 " do manual commit offset for %s, vgId:%d", tmq->consumerId, pTopicName, vgId); + +static int32_t getClientVg(tmq_t* tmq, char* pTopicName, int32_t vgId, SMqClientVg** pVg){ SMqClientTopic* pTopic = getTopicByName(tmq, pTopicName); if (pTopic == NULL) { - tscWarn("consumer:0x%" PRIx64 " failed to find the specified topic:%s, total topics:%d", tmq->consumerId, - pTopicName, numOfTopics); - taosMemoryFree(pParamSet); - pCommitFp(tmq, TSDB_CODE_SUCCESS, userParam); - taosRUnLockLatch(&tmq->lock); - return; + tscError("consumer:0x%" PRIx64 " invalid topic name:%s", tmq->consumerId, pTopicName); + return TSDB_CODE_TMQ_INVALID_TOPIC; } - int32_t j = 0; - int32_t numOfVgroups = taosArrayGetSize(pTopic->vgs); - for (j = 0; j < numOfVgroups; j++) { - SMqClientVg* pVg = (SMqClientVg*)taosArrayGet(pTopic->vgs, j); - if (pVg->vgId == vgId) { + int32_t numOfVgs = taosArrayGetSize(pTopic->vgs); + for (int32_t i = 0; i < numOfVgs; ++i) { + SMqClientVg* pClientVg = taosArrayGet(pTopic->vgs, i); + if (pClientVg->vgId == vgId) { + *pVg = pClientVg; break; } } - if (j == numOfVgroups) { - tscWarn("consumer:0x%" PRIx64 " failed to find the specified vgId:%d, total Vgs:%d, topic:%s", tmq->consumerId, - vgId, numOfVgroups, pTopicName); - taosMemoryFree(pParamSet); - pCommitFp(tmq, TSDB_CODE_SUCCESS, userParam); - taosRUnLockLatch(&tmq->lock); - return; + return *pVg == NULL ? TSDB_CODE_TMQ_INVALID_VGID : TSDB_CODE_SUCCESS; +} + +static int32_t asyncCommitOffset(tmq_t* tmq, char* pTopicName, int32_t vgId, STqOffsetVal* offsetVal, tmq_commit_cb* pCommitFp, void* userParam) { + int32_t code = 0; + tscInfo("consumer:0x%" PRIx64 " do manual commit offset for %s, vgId:%d", tmq->consumerId, pTopicName, vgId); + taosRLockLatch(&tmq->lock); + SMqClientVg* pVg = NULL; + code = getClientVg(tmq, pTopicName, vgId, &pVg); + if(code != 0){ + goto end; + } + if (offsetVal->type <= 0) { + code = TSDB_CODE_TMQ_INVALID_MSG; + goto end; } + if (tOffsetEqual(offsetVal, &pVg->offsetInfo.committedOffset)){ + code = TSDB_CODE_TMQ_SAME_COMMITTED_VALUE; + goto end; + } + char offsetBuf[TSDB_OFFSET_LEN] = {0}; + tFormatOffset(offsetBuf, tListLen(offsetBuf), offsetVal); - SMqClientVg* pVg = (SMqClientVg*)taosArrayGet(pTopic->vgs, j); - if (pVg->offsetInfo.currentOffset.type > 0 && !tOffsetEqual(&pVg->offsetInfo.currentOffset, &pVg->offsetInfo.committedOffset)) { - code = doSendCommitMsg(tmq, pVg, pTopic->topicName, pParamSet, j, numOfVgroups, type); + char commitBuf[TSDB_OFFSET_LEN] = {0}; + tFormatOffset(commitBuf, tListLen(commitBuf), &pVg->offsetInfo.committedOffset); - // failed to commit, callback user function directly. - if (code != TSDB_CODE_SUCCESS) { - taosMemoryFree(pParamSet); - pCommitFp(tmq, code, userParam); - } - // update the offset value. - pVg->offsetInfo.committedOffset = pVg->offsetInfo.currentOffset; - } else { // do not perform commit, callback user function directly. + SMqCommitCbParamSet* pParamSet = prepareCommitCbParamSet(tmq, pCommitFp, userParam, 0); + if (pParamSet == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto end; + } + code = doSendCommitMsg(tmq, pVg->vgId, &pVg->epSet, offsetVal, pTopicName, pParamSet); + if (code != TSDB_CODE_SUCCESS) { + tscError("consumer:0x%" PRIx64 " topic:%s on vgId:%d end commit msg failed, send offset:%s committed:%s, code:%s", + tmq->consumerId, pTopicName, pVg->vgId, offsetBuf, commitBuf, tstrerror(terrno)); taosMemoryFree(pParamSet); - pCommitFp(tmq, code, userParam); + goto end; } + + tscInfo("consumer:0x%" PRIx64 " topic:%s on vgId:%d send commit msg success, send offset:%s committed:%s", + tmq->consumerId, pTopicName, pVg->vgId, offsetBuf, commitBuf); + pVg->offsetInfo.committedOffset = *offsetVal; + +end: taosRUnLockLatch(&tmq->lock); + return code; } -static void asyncCommitAllOffsets(tmq_t* tmq, tmq_commit_cb* pCommitFp, void* userParam) { - SMqCommitCbParamSet* pParamSet = taosMemoryCalloc(1, sizeof(SMqCommitCbParamSet)); - if (pParamSet == NULL) { - pCommitFp(tmq, TSDB_CODE_OUT_OF_MEMORY, userParam); - return; +static void asyncCommitFromResult(tmq_t* tmq, const TAOS_RES* pRes, tmq_commit_cb* pCommitFp, void* userParam){ + char* pTopicName = NULL; + int32_t vgId = 0; + STqOffsetVal offsetVal = {0}; + int32_t code = 0; + + if (pRes == NULL || tmq == NULL) { + code = TSDB_CODE_INVALID_PARA; + goto end; } - pParamSet->refId = tmq->refId; - pParamSet->epoch = tmq->epoch; - pParamSet->callbackFn = pCommitFp; - pParamSet->userParam = userParam; + if (TD_RES_TMQ(pRes)) { + SMqRspObj* pRspObj = (SMqRspObj*)pRes; + pTopicName = pRspObj->topic; + vgId = pRspObj->vgId; + offsetVal = pRspObj->rsp.rspOffset; + } else if (TD_RES_TMQ_META(pRes)) { + SMqMetaRspObj* pMetaRspObj = (SMqMetaRspObj*)pRes; + pTopicName = pMetaRspObj->topic; + vgId = pMetaRspObj->vgId; + offsetVal = pMetaRspObj->metaRsp.rspOffset; + } else if (TD_RES_TMQ_METADATA(pRes)) { + SMqTaosxRspObj* pRspObj = (SMqTaosxRspObj*)pRes; + pTopicName = pRspObj->topic; + vgId = pRspObj->vgId; + offsetVal = pRspObj->rsp.rspOffset; + } else { + code = TSDB_CODE_TMQ_INVALID_MSG; + goto end; + } + + code = asyncCommitOffset(tmq, pTopicName, vgId, &offsetVal, pCommitFp, userParam); + +end: + if(code != TSDB_CODE_SUCCESS && pCommitFp != NULL){ + if(code == TSDB_CODE_TMQ_SAME_COMMITTED_VALUE) code = TSDB_CODE_SUCCESS; + pCommitFp(tmq, code, userParam); + } +} +static void asyncCommitAllOffsets(tmq_t* tmq, tmq_commit_cb* pCommitFp, void* userParam) { + int32_t code = 0; // init as 1 to prevent concurrency issue - pParamSet->waitingRspNum = 1; + SMqCommitCbParamSet* pParamSet = prepareCommitCbParamSet(tmq, pCommitFp, userParam, 1); + if (pParamSet == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto end; + } taosRLockLatch(&tmq->lock); int32_t numOfTopics = taosArrayGetSize(tmq->clientTopics); - tscDebug("consumer:0x%" PRIx64 " start to commit offset for %d topics", tmq->consumerId, numOfTopics); + tscInfo("consumer:0x%" PRIx64 " start to commit offset for %d topics", tmq->consumerId, numOfTopics); for (int32_t i = 0; i < numOfTopics; i++) { SMqClientTopic* pTopic = taosArrayGet(tmq->clientTopics, i); int32_t numOfVgroups = taosArrayGetSize(pTopic->vgs); - tscDebug("consumer:0x%" PRIx64 " commit offset for topics:%s, numOfVgs:%d", tmq->consumerId, pTopic->topicName, - numOfVgroups); + tscInfo("consumer:0x%" PRIx64 " commit offset for topics:%s, numOfVgs:%d", tmq->consumerId, pTopic->topicName, numOfVgroups); for (int32_t j = 0; j < numOfVgroups; j++) { SMqClientVg* pVg = taosArrayGet(pTopic->vgs, j); - if (pVg->offsetInfo.currentOffset.type > 0 && !tOffsetEqual(&pVg->offsetInfo.currentOffset, &pVg->offsetInfo.committedOffset)) { - int32_t code = doSendCommitMsg(tmq, pVg, pTopic->topicName, pParamSet, j, numOfVgroups, TDMT_VND_TMQ_COMMIT_OFFSET); + if (pVg->offsetInfo.endOffset.type > 0 && !tOffsetEqual(&pVg->offsetInfo.endOffset, &pVg->offsetInfo.committedOffset)) { + char offsetBuf[TSDB_OFFSET_LEN] = {0}; + tFormatOffset(offsetBuf, tListLen(offsetBuf), &pVg->offsetInfo.endOffset); + + char commitBuf[TSDB_OFFSET_LEN] = {0}; + tFormatOffset(commitBuf, tListLen(commitBuf), &pVg->offsetInfo.committedOffset); + + code = doSendCommitMsg(tmq, pVg->vgId, &pVg->epSet, &pVg->offsetInfo.endOffset, pTopic->topicName, pParamSet); if (code != TSDB_CODE_SUCCESS) { - tscError("consumer:0x%" PRIx64 " topic:%s vgId:%d offset:%" PRId64 " failed, code:%s ordinal:%d/%d", - tmq->consumerId, pTopic->topicName, pVg->vgId, pVg->offsetInfo.committedOffset.version, tstrerror(terrno), - j + 1, numOfVgroups); + tscError("consumer:0x%" PRIx64 " topic:%s on vgId:%d end commit msg failed, send offset:%s committed:%s, code:%s ordinal:%d/%d", + tmq->consumerId, pTopic->topicName, pVg->vgId, offsetBuf, commitBuf, tstrerror(terrno), j + 1, numOfVgroups); continue; } - // update the offset value. - pVg->offsetInfo.committedOffset = pVg->offsetInfo.currentOffset; + tscInfo("consumer:0x%" PRIx64 " topic:%s on vgId:%d send commit msg success, send offset:%s committed:%s, ordinal:%d/%d", + tmq->consumerId, pTopic->topicName, pVg->vgId, offsetBuf, commitBuf, j + 1, numOfVgroups); + pVg->offsetInfo.committedOffset = pVg->offsetInfo.endOffset; } else { - tscDebug("consumer:0x%" PRIx64 " topic:%s vgId:%d, no commit, current:%" PRId64 ", ordinal:%d/%d", - tmq->consumerId, pTopic->topicName, pVg->vgId, pVg->offsetInfo.currentOffset.version, j + 1, numOfVgroups); + tscInfo("consumer:0x%" PRIx64 " topic:%s vgId:%d, no commit, current:%" PRId64 ", ordinal:%d/%d", + tmq->consumerId, pTopic->topicName, pVg->vgId, pVg->offsetInfo.endOffset.version, j + 1, numOfVgroups); } } } taosRUnLockLatch(&tmq->lock); - tscDebug("consumer:0x%" PRIx64 " total commit:%d for %d topics", tmq->consumerId, pParamSet->waitingRspNum - 1, - numOfTopics); + tscInfo("consumer:0x%" PRIx64 " total commit:%d for %d topics", tmq->consumerId, pParamSet->waitingRspNum - 1, numOfTopics); - // no request is sent - if (pParamSet->totalRspNum == 0) { - taosMemoryFree(pParamSet); - pCommitFp(tmq, TSDB_CODE_SUCCESS, userParam); + // request is sent + if (pParamSet->totalRspNum != 0) { + // count down since waiting rsp num init as 1 + commitRspCountDown(pParamSet, tmq->consumerId, "", 0); return; } - // count down since waiting rsp num init as 1 - commitRspCountDown(pParamSet, tmq->consumerId, "", 0); +end: + taosMemoryFree(pParamSet); + if(pCommitFp != NULL) { + pCommitFp(tmq, code, userParam); + } + return; } static void generateTimedTask(int64_t refId, int32_t type) { @@ -821,7 +798,7 @@ void tmqSendHbReq(void* param, void* tmrId) { OffsetRows* offRows = taosArrayReserve(data->offsetRows, 1); offRows->vgId = pVg->vgId; offRows->rows = pVg->numOfRows; - offRows->offset = pVg->offsetInfo.currentOffset; + offRows->offset = pVg->offsetInfo.beginOffset; char buf[TSDB_OFFSET_LEN] = {0}; tFormatOffset(buf, TSDB_OFFSET_LEN, &offRows->offset); tscInfo("consumer:0x%" PRIx64 ",report offset: vgId:%d, offset:%s, rows:%"PRId64, tmq->consumerId, offRows->vgId, buf, offRows->rows); @@ -993,6 +970,7 @@ int32_t tmqSubscribeCb(void* param, SDataBuf* pMsg, int32_t code) { } int32_t tmq_subscription(tmq_t* tmq, tmq_list_t** topics) { + if(tmq == NULL) return TSDB_CODE_INVALID_PARA; if (*topics == NULL) { *topics = tmq_list_new(); } @@ -1006,6 +984,7 @@ int32_t tmq_subscription(tmq_t* tmq, tmq_list_t** topics) { } int32_t tmq_unsubscribe(tmq_t* tmq) { + if(tmq == NULL) return TSDB_CODE_INVALID_PARA; if (tmq->autoCommit) { int32_t rsp = tmq_commit_sync(tmq, NULL); if (rsp != 0) { @@ -1076,6 +1055,7 @@ static void tmqMgmtInit(void) { } tmq_t* tmq_consumer_new(tmq_conf_t* conf, char* errstr, int32_t errstrLen) { + if(conf == NULL) return NULL; taosThreadOnce(&tmqInit, tmqMgmtInit); if (tmqInitRes != 0) { terrno = tmqInitRes; @@ -1169,6 +1149,7 @@ _failed: } int32_t tmq_subscribe(tmq_t* tmq, const tmq_list_t* topic_list) { + if(tmq == NULL) return TSDB_CODE_INVALID_PARA; const int32_t MAX_RETRY_COUNT = 120 * 2; // let's wait for 2 mins at most const SArray* container = &topic_list->container; int32_t sz = taosArrayGetSize(container); @@ -1293,6 +1274,7 @@ FAIL: } void tmq_conf_set_auto_commit_cb(tmq_conf_t* conf, tmq_commit_cb* cb, void* param) { + if(conf == NULL) return; conf->commitCb = cb; conf->commitCbUserParam = param; } @@ -1479,6 +1461,7 @@ CREATE_MSG_FAIL: typedef struct SVgroupSaveInfo { STqOffsetVal currentOffset; STqOffsetVal commitOffset; + STqOffsetVal seekOffset; int64_t numOfRows; } SVgroupSaveInfo; @@ -1516,8 +1499,9 @@ static void initClientTopicFromRsp(SMqClientTopic* pTopic, SMqSubTopicEp* pTopic .numOfRows = pInfo ? pInfo->numOfRows : 0, }; - clientVg.offsetInfo.currentOffset = pInfo ? pInfo->currentOffset : offsetNew; + clientVg.offsetInfo.endOffset = pInfo ? pInfo->currentOffset : offsetNew; clientVg.offsetInfo.committedOffset = pInfo ? pInfo->commitOffset : offsetNew; + clientVg.offsetInfo.beginOffset = pInfo ? pInfo->seekOffset : offsetNew; clientVg.offsetInfo.walVerBegin = -1; clientVg.offsetInfo.walVerEnd = -1; clientVg.seekUpdated = false; @@ -1573,11 +1557,11 @@ static bool doUpdateLocalEp(tmq_t* tmq, int32_t epoch, const SMqAskEpRsp* pRsp) makeTopicVgroupKey(vgKey, pTopicCur->topicName, pVgCur->vgId); char buf[TSDB_OFFSET_LEN] = {0}; - tFormatOffset(buf, TSDB_OFFSET_LEN, &pVgCur->offsetInfo.currentOffset); + tFormatOffset(buf, TSDB_OFFSET_LEN, &pVgCur->offsetInfo.endOffset); tscInfo("consumer:0x%" PRIx64 ", epoch:%d vgId:%d vgKey:%s, offset:%s", tmq->consumerId, epoch, pVgCur->vgId, vgKey, buf); - SVgroupSaveInfo info = {.currentOffset = pVgCur->offsetInfo.currentOffset, .commitOffset = pVgCur->offsetInfo.committedOffset, .numOfRows = pVgCur->numOfRows}; + SVgroupSaveInfo info = {.currentOffset = pVgCur->offsetInfo.endOffset, .seekOffset = pVgCur->offsetInfo.beginOffset, .commitOffset = pVgCur->offsetInfo.committedOffset, .numOfRows = pVgCur->numOfRows}; taosHashPut(pVgOffsetHashMap, vgKey, strlen(vgKey), &info, sizeof(SVgroupSaveInfo)); } } @@ -1674,7 +1658,7 @@ void tmqBuildConsumeReqImpl(SMqPollReq* pReq, tmq_t* tmq, int64_t timeout, SMqCl pReq->consumerId = tmq->consumerId; pReq->timeout = timeout; pReq->epoch = tmq->epoch; - pReq->reqOffset = pVg->offsetInfo.currentOffset; + pReq->reqOffset = pVg->offsetInfo.endOffset; pReq->head.vgId = pVg->vgId; pReq->useSnapshot = tmq->useSnapshot; pReq->reqId = generateRequestId(); @@ -1801,7 +1785,7 @@ static int32_t doTmqPollImpl(tmq_t* pTmq, SMqClientTopic* pTopic, SMqClientVg* p int64_t transporterId = 0; char offsetFormatBuf[TSDB_OFFSET_LEN] = {0}; - tFormatOffset(offsetFormatBuf, tListLen(offsetFormatBuf), &pVg->offsetInfo.currentOffset); + tFormatOffset(offsetFormatBuf, tListLen(offsetFormatBuf), &pVg->offsetInfo.endOffset); tscDebug("consumer:0x%" PRIx64 " send poll to %s vgId:%d, epoch %d, req:%s, reqId:0x%" PRIx64, pTmq->consumerId, pTopic->topicName, pVg->vgId, pTmq->epoch, offsetFormatBuf, req.reqId); @@ -1879,10 +1863,11 @@ static int32_t tmqHandleNoPollRsp(tmq_t* tmq, SMqRspWrapper* rspWrapper, bool* p return 0; } -static void updateVgInfo(SMqClientVg* pVg, STqOffsetVal* offset, int64_t sver, int64_t ever, int64_t consumerId){ +static void updateVgInfo(SMqClientVg* pVg, STqOffsetVal* reqOffset, STqOffsetVal* rspOffset, int64_t sver, int64_t ever, int64_t consumerId){ if (!pVg->seekUpdated) { tscDebug("consumer:0x%" PRIx64" local offset is update, since seekupdate not set", consumerId); - pVg->offsetInfo.currentOffset = *offset; + pVg->offsetInfo.beginOffset = *reqOffset; + pVg->offsetInfo.endOffset = *rspOffset; } else { tscDebug("consumer:0x%" PRIx64" local offset is NOT update, since seekupdate is set", consumerId); } @@ -1892,7 +1877,7 @@ static void updateVgInfo(SMqClientVg* pVg, STqOffsetVal* offset, int64_t sver, i // update the valid wal version range pVg->offsetInfo.walVerBegin = sver; - pVg->offsetInfo.walVerEnd = ever; + pVg->offsetInfo.walVerEnd = ever + 1; // pVg->receivedInfoFromVnode = true; } @@ -1944,7 +1929,7 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { pVg->epSet = *pollRspWrapper->pEpset; } - updateVgInfo(pVg, &pDataRsp->rspOffset, pDataRsp->head.walsver, pDataRsp->head.walever, tmq->consumerId); + updateVgInfo(pVg, &pDataRsp->reqOffset, &pDataRsp->rspOffset, pDataRsp->head.walsver, pDataRsp->head.walever, tmq->consumerId); char buf[TSDB_OFFSET_LEN] = {0}; tFormatOffset(buf, TSDB_OFFSET_LEN, &pDataRsp->rspOffset); @@ -1970,7 +1955,7 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { } taosWUnLockLatch(&tmq->lock); } else { - tscDebug("consumer:0x%" PRIx64 " vgId:%d msg discard since epoch mismatch: msg epoch %d, consumer epoch %d", + tscInfo("consumer:0x%" PRIx64 " vgId:%d msg discard since epoch mismatch: msg epoch %d, consumer epoch %d", tmq->consumerId, pollRspWrapper->vgId, pDataRsp->head.epoch, consumerEpoch); pRspWrapper = tmqFreeRspWrapper(pRspWrapper); taosFreeQitem(pollRspWrapper); @@ -1994,14 +1979,14 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { return NULL; } - updateVgInfo(pVg, &pollRspWrapper->metaRsp.rspOffset, pollRspWrapper->metaRsp.head.walsver, pollRspWrapper->metaRsp.head.walever, tmq->consumerId); + updateVgInfo(pVg, &pollRspWrapper->metaRsp.rspOffset, &pollRspWrapper->metaRsp.rspOffset, pollRspWrapper->metaRsp.head.walsver, pollRspWrapper->metaRsp.head.walever, tmq->consumerId); // build rsp SMqMetaRspObj* pRsp = tmqBuildMetaRspFromWrapper(pollRspWrapper); taosFreeQitem(pollRspWrapper); taosWUnLockLatch(&tmq->lock); return pRsp; } else { - tscDebug("consumer:0x%" PRIx64 " vgId:%d msg discard since epoch mismatch: msg epoch %d, consumer epoch %d", + tscInfo("consumer:0x%" PRIx64 " vgId:%d msg discard since epoch mismatch: msg epoch %d, consumer epoch %d", tmq->consumerId, pollRspWrapper->vgId, pollRspWrapper->metaRsp.head.epoch, consumerEpoch); pRspWrapper = tmqFreeRspWrapper(pRspWrapper); taosFreeQitem(pollRspWrapper); @@ -2022,7 +2007,7 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { return NULL; } - updateVgInfo(pVg, &pollRspWrapper->taosxRsp.rspOffset, pollRspWrapper->taosxRsp.head.walsver, pollRspWrapper->taosxRsp.head.walever, tmq->consumerId); + updateVgInfo(pVg, &pollRspWrapper->taosxRsp.reqOffset, &pollRspWrapper->taosxRsp.rspOffset, pollRspWrapper->taosxRsp.head.walsver, pollRspWrapper->taosxRsp.head.walever, tmq->consumerId); if (pollRspWrapper->taosxRsp.blockNum == 0) { tscDebug("consumer:0x%" PRIx64 " taosx empty block received, vgId:%d, vg total:%" PRId64 ", reqId:0x%" PRIx64, @@ -2047,18 +2032,18 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { tmq->totalRows += numOfRows; - char buf[TSDB_OFFSET_LEN] = {0}; - tFormatOffset(buf, TSDB_OFFSET_LEN, &pVg->offsetInfo.currentOffset); - tscDebug("consumer:0x%" PRIx64 " process taosx poll rsp, vgId:%d, offset:%s, blocks:%d, rows:%" PRId64 - ", vg total:%" PRId64 ", total:%" PRId64 ", reqId:0x%" PRIx64, - tmq->consumerId, pVg->vgId, buf, pollRspWrapper->dataRsp.blockNum, numOfRows, pVg->numOfRows, - tmq->totalRows, pollRspWrapper->reqId); + char buf[TSDB_OFFSET_LEN] = {0}; + tFormatOffset(buf, TSDB_OFFSET_LEN, &pVg->offsetInfo.endOffset); + tscDebug("consumer:0x%" PRIx64 " process taosx poll rsp, vgId:%d, offset:%s, blocks:%d, rows:%" PRId64 + ", vg total:%" PRId64 ", total:%" PRId64 ", reqId:0x%" PRIx64, + tmq->consumerId, pVg->vgId, buf, pollRspWrapper->dataRsp.blockNum, numOfRows, pVg->numOfRows, + tmq->totalRows, pollRspWrapper->reqId); taosFreeQitem(pollRspWrapper); taosWUnLockLatch(&tmq->lock); return pRsp; } else { - tscDebug("consumer:0x%" PRIx64 " vgId:%d msg discard since epoch mismatch: msg epoch %d, consumer epoch %d", + tscInfo("consumer:0x%" PRIx64 " vgId:%d msg discard since epoch mismatch: msg epoch %d, consumer epoch %d", tmq->consumerId, pollRspWrapper->vgId, pollRspWrapper->taosxRsp.head.epoch, consumerEpoch); pRspWrapper = tmqFreeRspWrapper(pRspWrapper); taosFreeQitem(pollRspWrapper); @@ -2078,6 +2063,8 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { } TAOS_RES* tmq_consumer_poll(tmq_t* tmq, int64_t timeout) { + if(tmq == NULL) return NULL; + void* rspObj; int64_t startTime = taosGetTimestampMs(); @@ -2157,6 +2144,8 @@ static void displayConsumeStatistics(tmq_t* pTmq) { } int32_t tmq_consumer_close(tmq_t* tmq) { + if(tmq == NULL) return TSDB_CODE_INVALID_PARA; + tscInfo("consumer:0x%" PRIx64 " start to close consumer, status:%d", tmq->consumerId, tmq->status); displayConsumeStatistics(tmq); @@ -2202,6 +2191,9 @@ const char* tmq_err2str(int32_t err) { } tmq_res_t tmq_get_res_type(TAOS_RES* res) { + if (res == NULL){ + return TMQ_RES_INVALID; + } if (TD_RES_TMQ(res)) { return TMQ_RES_DATA; } else if (TD_RES_TMQ_META(res)) { @@ -2214,6 +2206,9 @@ tmq_res_t tmq_get_res_type(TAOS_RES* res) { } const char* tmq_get_topic_name(TAOS_RES* res) { + if (res == NULL){ + return NULL; + } if (TD_RES_TMQ(res)) { SMqRspObj* pRspObj = (SMqRspObj*)res; return strchr(pRspObj->topic, '.') + 1; @@ -2229,6 +2224,10 @@ const char* tmq_get_topic_name(TAOS_RES* res) { } const char* tmq_get_db_name(TAOS_RES* res) { + if (res == NULL){ + return NULL; + } + if (TD_RES_TMQ(res)) { SMqRspObj* pRspObj = (SMqRspObj*)res; return strchr(pRspObj->db, '.') + 1; @@ -2244,6 +2243,9 @@ const char* tmq_get_db_name(TAOS_RES* res) { } int32_t tmq_get_vgroup_id(TAOS_RES* res) { + if (res == NULL){ + return -1; + } if (TD_RES_TMQ(res)) { SMqRspObj* pRspObj = (SMqRspObj*)res; return pRspObj->vgId; @@ -2259,11 +2261,16 @@ int32_t tmq_get_vgroup_id(TAOS_RES* res) { } int64_t tmq_get_vgroup_offset(TAOS_RES* res) { + if (res == NULL){ + return TSDB_CODE_INVALID_PARA; + } if (TD_RES_TMQ(res)) { SMqRspObj* pRspObj = (SMqRspObj*) res; - STqOffsetVal* pOffset = &pRspObj->rsp.rspOffset; + STqOffsetVal* pOffset = &pRspObj->rsp.reqOffset; if (pOffset->type == TMQ_OFFSET__LOG) { - return pRspObj->rsp.rspOffset.version; + return pRspObj->rsp.reqOffset.version; + }else{ + tscError("invalid offset type:%d", pOffset->type); } } else if (TD_RES_TMQ_META(res)) { SMqMetaRspObj* pRspObj = (SMqMetaRspObj*)res; @@ -2272,16 +2279,21 @@ int64_t tmq_get_vgroup_offset(TAOS_RES* res) { } } else if (TD_RES_TMQ_METADATA(res)) { SMqTaosxRspObj* pRspObj = (SMqTaosxRspObj*) res; - if (pRspObj->rsp.rspOffset.type == TMQ_OFFSET__LOG) { - return pRspObj->rsp.rspOffset.version; + if (pRspObj->rsp.reqOffset.type == TMQ_OFFSET__LOG) { + return pRspObj->rsp.reqOffset.version; } + } else{ + tscError("invalid tmq type:%d", *(int8_t*)res); } // data from tsdb, no valid offset info - return -1; + return TSDB_CODE_TMQ_SNAPSHOT_ERROR; } const char* tmq_get_table_name(TAOS_RES* res) { + if (res == NULL){ + return NULL; + } if (TD_RES_TMQ(res)) { SMqRspObj* pRspObj = (SMqRspObj*)res; if (!pRspObj->rsp.withTbName || pRspObj->rsp.blockTbName == NULL || pRspObj->resIter < 0 || @@ -2301,10 +2313,17 @@ const char* tmq_get_table_name(TAOS_RES* res) { } void tmq_commit_async(tmq_t* tmq, const TAOS_RES* pRes, tmq_commit_cb* cb, void* param) { + if (tmq == NULL) { + tscError("invalid tmq handle, null"); + if(cb != NULL) { + cb(tmq, TSDB_CODE_INVALID_PARA, param); + } + return; + } if (pRes == NULL) { // here needs to commit all offsets. asyncCommitAllOffsets(tmq, cb, param); } else { // only commit one offset - asyncCommitOffset(tmq, pRes, TDMT_VND_TMQ_COMMIT_OFFSET, cb, param); + asyncCommitFromResult(tmq, pRes, cb, param); } } @@ -2315,6 +2334,11 @@ static void commitCallBackFn(tmq_t *UNUSED_PARAM(tmq), int32_t code, void* param } int32_t tmq_commit_sync(tmq_t* tmq, const TAOS_RES* pRes) { + if (tmq == NULL) { + tscError("invalid tmq handle, null"); + return TSDB_CODE_INVALID_PARA; + } + int32_t code = 0; SSyncCommitInfo* pInfo = taosMemoryMalloc(sizeof(SSyncCommitInfo)); @@ -2324,7 +2348,7 @@ int32_t tmq_commit_sync(tmq_t* tmq, const TAOS_RES* pRes) { if (pRes == NULL) { asyncCommitAllOffsets(tmq, commitCallBackFn, pInfo); } else { - asyncCommitOffset(tmq, pRes, TDMT_VND_TMQ_COMMIT_OFFSET, commitCallBackFn, pInfo); + asyncCommitFromResult(tmq, pRes, commitCallBackFn, pInfo); } tsem_wait(&pInfo->sem); @@ -2333,10 +2357,118 @@ int32_t tmq_commit_sync(tmq_t* tmq, const TAOS_RES* pRes) { tsem_destroy(&pInfo->sem); taosMemoryFree(pInfo); - tscDebug("consumer:0x%" PRIx64 " sync commit done, code:%s", tmq->consumerId, tstrerror(code)); + tscInfo("consumer:0x%" PRIx64 " sync res commit done, code:%s", tmq->consumerId, tstrerror(code)); return code; } +// wal range will be ok after calling tmq_get_topic_assignment or poll interface +static int32_t checkWalRange(SVgOffsetInfo* offset, int64_t value){ + if (offset->walVerBegin == -1 || offset->walVerEnd == -1) { + tscError("Assignment or poll interface need to be called first"); + return TSDB_CODE_TMQ_NEED_INITIALIZED; + } + + if (value != -1 && (value < offset->walVerBegin || value > offset->walVerEnd)) { + tscError("invalid seek params, offset:%" PRId64 ", valid range:[%" PRId64 ", %" PRId64 "]", value, offset->walVerBegin, offset->walVerEnd); + return TSDB_CODE_TMQ_VERSION_OUT_OF_RANGE; + } + + return 0; +} + +int32_t tmq_commit_offset_sync(tmq_t *tmq, const char *pTopicName, int32_t vgId, int64_t offset){ + if (tmq == NULL || pTopicName == NULL) { + tscError("invalid tmq handle, null"); + return TSDB_CODE_INVALID_PARA; + } + + int32_t accId = tmq->pTscObj->acctId; + char tname[TSDB_TOPIC_FNAME_LEN] = {0}; + sprintf(tname, "%d.%s", accId, pTopicName); + + taosWLockLatch(&tmq->lock); + SMqClientVg* pVg = NULL; + int32_t code = getClientVg(tmq, tname, vgId, &pVg); + if(code != 0){ + taosWUnLockLatch(&tmq->lock); + return code; + } + + SVgOffsetInfo* pOffsetInfo = &pVg->offsetInfo; + code = checkWalRange(pOffsetInfo, offset); + if (code != 0) { + taosWUnLockLatch(&tmq->lock); + return code; + } + taosWUnLockLatch(&tmq->lock); + + STqOffsetVal offsetVal = {.type = TMQ_OFFSET__LOG, .version = offset}; + + SSyncCommitInfo* pInfo = taosMemoryMalloc(sizeof(SSyncCommitInfo)); + if (pInfo == NULL) { + tscError("consumer:0x%"PRIx64" failed to prepare seek operation", tmq->consumerId); + return TSDB_CODE_OUT_OF_MEMORY; + } + + tsem_init(&pInfo->sem, 0, 0); + pInfo->code = 0; + + code = asyncCommitOffset(tmq, tname, vgId, &offsetVal, commitCallBackFn, pInfo); + if(code == 0){ + tsem_wait(&pInfo->sem); + code = pInfo->code; + } + + if(code == TSDB_CODE_TMQ_SAME_COMMITTED_VALUE) code = TSDB_CODE_SUCCESS; + tsem_destroy(&pInfo->sem); + taosMemoryFree(pInfo); + + tscInfo("consumer:0x%" PRIx64 " sync send commit to vgId:%d, offset:%" PRId64" code:%s", tmq->consumerId, vgId, offset, tstrerror(code)); + + return code; +} + +void tmq_commit_offset_async(tmq_t *tmq, const char *pTopicName, int32_t vgId, int64_t offset, tmq_commit_cb *cb, void *param){ + int32_t code = 0; + if (tmq == NULL || pTopicName == NULL) { + tscError("invalid tmq handle, null"); + code = TSDB_CODE_INVALID_PARA; + goto end; + } + + int32_t accId = tmq->pTscObj->acctId; + char tname[TSDB_TOPIC_FNAME_LEN] = {0}; + sprintf(tname, "%d.%s", accId, pTopicName); + + taosWLockLatch(&tmq->lock); + SMqClientVg* pVg = NULL; + code = getClientVg(tmq, tname, vgId, &pVg); + if(code != 0){ + taosWUnLockLatch(&tmq->lock); + goto end; + } + + SVgOffsetInfo* pOffsetInfo = &pVg->offsetInfo; + code = checkWalRange(pOffsetInfo, offset); + if (code != 0) { + taosWUnLockLatch(&tmq->lock); + goto end; + } + taosWUnLockLatch(&tmq->lock); + + STqOffsetVal offsetVal = {.type = TMQ_OFFSET__LOG, .version = offset}; + + code = asyncCommitOffset(tmq, tname, vgId, &offsetVal, cb, param); + + tscInfo("consumer:0x%" PRIx64 " async send commit to vgId:%d, offset:%" PRId64" code:%s", tmq->consumerId, vgId, offset, tstrerror(code)); + +end: + if(code != 0 && cb != NULL){ + if(code == TSDB_CODE_TMQ_SAME_COMMITTED_VALUE) code = TSDB_CODE_SUCCESS; + cb(tmq, code, param); + } +} + void updateEpCallbackFn(tmq_t* pTmq, int32_t code, SDataBuf* pDataBuf, void* param) { SAskEpInfo* pInfo = param; pInfo->code = code; @@ -2468,7 +2600,10 @@ int32_t tmqCommitDone(SMqCommitCbParamSet* pParamSet) { } // if no more waiting rsp - pParamSet->callbackFn(tmq, pParamSet->code, pParamSet->userParam); + if(pParamSet->callbackFn != NULL){ + pParamSet->callbackFn(tmq, pParamSet->code, pParamSet->userParam); + } + taosMemoryFree(pParamSet); // tmq->needReportOffsetRows = true; @@ -2479,12 +2614,10 @@ int32_t tmqCommitDone(SMqCommitCbParamSet* pParamSet) { void commitRspCountDown(SMqCommitCbParamSet* pParamSet, int64_t consumerId, const char* pTopic, int32_t vgId) { int32_t waitingRspNum = atomic_sub_fetch_32(&pParamSet->waitingRspNum, 1); if (waitingRspNum == 0) { - tscDebug("consumer:0x%" PRIx64 " topic:%s vgId:%d all commit-rsp received, commit completed", consumerId, pTopic, - vgId); + tscInfo("consumer:0x%" PRIx64 " topic:%s vgId:%d all commit-rsp received, commit completed", consumerId, pTopic, vgId); tmqCommitDone(pParamSet); } else { - tscDebug("consumer:0x%" PRIx64 " topic:%s vgId:%d commit-rsp received, remain:%d", consumerId, pTopic, vgId, - waitingRspNum); + tscInfo("consumer:0x%" PRIx64 " topic:%s vgId:%d commit-rsp received, remain:%d", consumerId, pTopic, vgId, waitingRspNum); } } @@ -2530,7 +2663,7 @@ static int32_t tmqGetWalInfoCb(void* param, SDataBuf* pMsg, int32_t code) { SMqRspHead* pHead = pMsg->pData; tmq_topic_assignment assignment = {.begin = pHead->walsver, - .end = pHead->walever, + .end = pHead->walever + 1, .currentOffset = rsp.rspOffset.version, .vgId = pParam->vgId}; @@ -2543,6 +2676,8 @@ static int32_t tmqGetWalInfoCb(void* param, SDataBuf* pMsg, int32_t code) { tsem_post(&pCommon->rsp); } + taosMemoryFree(pMsg->pData); + taosMemoryFree(pMsg->pEpSet); taosMemoryFree(pParam); return 0; } @@ -2565,21 +2700,228 @@ static bool isInSnapshotMode(int8_t type, bool useSnapshot){ return false; } +static int32_t tmCommittedCb(void* param, SDataBuf* pMsg, int32_t code) { + SMqCommittedParam* pParam = param; + + if (code != 0){ + goto end; + } + if (pMsg) { + SDecoder decoder; + tDecoderInit(&decoder, (uint8_t*)pMsg->pData, pMsg->len); + if (tDecodeMqVgOffset(&decoder, &pParam->vgOffset) < 0) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto end; + } + tDecoderClear(&decoder); + } + + end: + if(pMsg){ + taosMemoryFree(pMsg->pData); + taosMemoryFree(pMsg->pEpSet); + } + pParam->code = code; + tsem_post(&pParam->sem); + return 0; +} + +int64_t getCommittedFromServer(tmq_t *tmq, char* tname, int32_t vgId, SEpSet* epSet){ + int32_t code = 0; + SMqVgOffset pOffset = {0}; + + pOffset.consumerId = tmq->consumerId; + + int32_t groupLen = strlen(tmq->groupId); + memcpy(pOffset.offset.subKey, tmq->groupId, groupLen); + pOffset.offset.subKey[groupLen] = TMQ_SEPARATOR; + strcpy(pOffset.offset.subKey + groupLen + 1, tname); + + int32_t len = 0; + tEncodeSize(tEncodeMqVgOffset, &pOffset, len, code); + if (code < 0) { + return TSDB_CODE_INVALID_PARA; + } + + void* buf = taosMemoryCalloc(1, sizeof(SMsgHead) + len); + if (buf == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + ((SMsgHead*)buf)->vgId = htonl(vgId); + + void* abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); + + SEncoder encoder; + tEncoderInit(&encoder, abuf, len); + tEncodeMqVgOffset(&encoder, &pOffset); + tEncoderClear(&encoder); + + SMsgSendInfo* sendInfo = taosMemoryCalloc(1, sizeof(SMsgSendInfo)); + if (sendInfo == NULL) { + taosMemoryFree(buf); + return TSDB_CODE_OUT_OF_MEMORY; + } + + SMqCommittedParam* pParam = taosMemoryMalloc(sizeof(SMqCommittedParam)); + if (pParam == NULL) { + taosMemoryFree(buf); + taosMemoryFree(sendInfo); + return TSDB_CODE_OUT_OF_MEMORY; + } + tsem_init(&pParam->sem, 0, 0); + + sendInfo->msgInfo = (SDataBuf){.pData = buf, .len = sizeof(SMsgHead) + len, .handle = NULL}; + sendInfo->requestId = generateRequestId(); + sendInfo->requestObjRefId = 0; + sendInfo->param = pParam; + sendInfo->fp = tmCommittedCb; + sendInfo->msgType = TDMT_VND_TMQ_VG_COMMITTEDINFO; + + int64_t transporterId = 0; + asyncSendMsgToServer(tmq->pTscObj->pAppInfo->pTransporter, epSet, &transporterId, sendInfo); + + tsem_wait(&pParam->sem); + code = pParam->code; + if(code == TSDB_CODE_SUCCESS){ + if(pParam->vgOffset.offset.val.type == TMQ_OFFSET__LOG){ + code = pParam->vgOffset.offset.val.version; + }else{ + code = TSDB_CODE_TMQ_SNAPSHOT_ERROR; + } + } + tsem_destroy(&pParam->sem); + taosMemoryFree(pParam); + + return code; +} + +int64_t tmq_position(tmq_t *tmq, const char *pTopicName, int32_t vgId){ + if (tmq == NULL || pTopicName == NULL) { + tscError("invalid tmq handle, null"); + return TSDB_CODE_INVALID_PARA; + } + + int32_t accId = tmq->pTscObj->acctId; + char tname[TSDB_TOPIC_FNAME_LEN] = {0}; + sprintf(tname, "%d.%s", accId, pTopicName); + + taosWLockLatch(&tmq->lock); + + SMqClientVg* pVg = NULL; + int32_t code = getClientVg(tmq, tname, vgId, &pVg); + if(code != 0){ + taosWUnLockLatch(&tmq->lock); + return code; + } + + SVgOffsetInfo* pOffsetInfo = &pVg->offsetInfo; + int32_t type = pOffsetInfo->endOffset.type; + if (isInSnapshotMode(type, tmq->useSnapshot)) { + tscError("consumer:0x%" PRIx64 " offset type:%d not wal version, position error", tmq->consumerId, type); + taosWUnLockLatch(&tmq->lock); + return TSDB_CODE_TMQ_SNAPSHOT_ERROR; + } + + code = checkWalRange(pOffsetInfo, -1); + if (code != 0) { + taosWUnLockLatch(&tmq->lock); + return code; + } + SEpSet epSet = pVg->epSet; + int64_t begin = pVg->offsetInfo.walVerBegin; + int64_t end = pVg->offsetInfo.walVerEnd; + taosWUnLockLatch(&tmq->lock); + + int64_t position = 0; + if(type == TMQ_OFFSET__LOG){ + position = pOffsetInfo->endOffset.version; + }else if(type == TMQ_OFFSET__RESET_EARLIEST || type == TMQ_OFFSET__RESET_LATEST){ + code = getCommittedFromServer(tmq, tname, vgId, &epSet); + if(code == TSDB_CODE_TMQ_NO_COMMITTED){ + if(type == TMQ_OFFSET__RESET_EARLIEST){ + position = begin; + } else if(type == TMQ_OFFSET__RESET_LATEST){ + position = end; + } + }else{ + position = code; + } + }else{ + tscError("consumer:0x%" PRIx64 " offset type:%d can not be reach here", tmq->consumerId, type); + } + + tscInfo("consumer:0x%" PRIx64 " tmq_position vgId:%d position:%" PRId64, tmq->consumerId, vgId, position); + return position; +} + +int64_t tmq_committed(tmq_t *tmq, const char *pTopicName, int32_t vgId){ + if (tmq == NULL || pTopicName == NULL) { + tscError("invalid tmq handle, null"); + return TSDB_CODE_INVALID_PARA; + } + + int32_t accId = tmq->pTscObj->acctId; + char tname[TSDB_TOPIC_FNAME_LEN] = {0}; + sprintf(tname, "%d.%s", accId, pTopicName); + + taosWLockLatch(&tmq->lock); + + SMqClientVg* pVg = NULL; + int32_t code = getClientVg(tmq, tname, vgId, &pVg); + if(code != 0){ + taosWUnLockLatch(&tmq->lock); + return code; + } + + SVgOffsetInfo* pOffsetInfo = &pVg->offsetInfo; + if (isInSnapshotMode(pOffsetInfo->endOffset.type, tmq->useSnapshot)) { + tscError("consumer:0x%" PRIx64 " offset type:%d not wal version, committed error", tmq->consumerId, pOffsetInfo->endOffset.type); + taosWUnLockLatch(&tmq->lock); + return TSDB_CODE_TMQ_SNAPSHOT_ERROR; + } + + if (isInSnapshotMode(pOffsetInfo->committedOffset.type, tmq->useSnapshot)) { + tscError("consumer:0x%" PRIx64 " offset type:%d not wal version, committed error", tmq->consumerId, pOffsetInfo->committedOffset.type); + taosWUnLockLatch(&tmq->lock); + return TSDB_CODE_TMQ_SNAPSHOT_ERROR; + } + + int64_t committed = 0; + if(pOffsetInfo->committedOffset.type == TMQ_OFFSET__LOG){ + committed = pOffsetInfo->committedOffset.version; + taosWUnLockLatch(&tmq->lock); + goto end; + } + SEpSet epSet = pVg->epSet; + taosWUnLockLatch(&tmq->lock); + + committed = getCommittedFromServer(tmq, tname, vgId, &epSet); + +end: + tscInfo("consumer:0x%" PRIx64 " tmq_committed vgId:%d committed:%" PRId64, tmq->consumerId, vgId, committed); + return committed; +} + int32_t tmq_get_topic_assignment(tmq_t* tmq, const char* pTopicName, tmq_topic_assignment** assignment, int32_t* numOfAssignment) { + if(tmq == NULL || pTopicName == NULL || assignment == NULL || numOfAssignment == NULL){ + tscError("invalid tmq handle, null"); + return TSDB_CODE_INVALID_PARA; + } *numOfAssignment = 0; *assignment = NULL; SMqVgCommon* pCommon = NULL; int32_t accId = tmq->pTscObj->acctId; - char tname[128] = {0}; + char tname[TSDB_TOPIC_FNAME_LEN] = {0}; sprintf(tname, "%d.%s", accId, pTopicName); int32_t code = TSDB_CODE_SUCCESS; taosWLockLatch(&tmq->lock); SMqClientTopic* pTopic = getTopicByName(tmq, tname); if (pTopic == NULL) { - code = TSDB_CODE_INVALID_PARA; + code = TSDB_CODE_TMQ_INVALID_TOPIC; goto end; } @@ -2587,7 +2929,7 @@ int32_t tmq_get_topic_assignment(tmq_t* tmq, const char* pTopicName, tmq_topic_a *numOfAssignment = taosArrayGetSize(pTopic->vgs); for (int32_t j = 0; j < (*numOfAssignment); ++j) { SMqClientVg* pClientVg = taosArrayGet(pTopic->vgs, j); - int32_t type = pClientVg->offsetInfo.currentOffset.type; + int32_t type = pClientVg->offsetInfo.beginOffset.type; if (isInSnapshotMode(type, tmq->useSnapshot)) { tscError("consumer:0x%" PRIx64 " offset type:%d not wal version, assignment not allowed", tmq->consumerId, type); code = TSDB_CODE_TMQ_SNAPSHOT_ERROR; @@ -2607,13 +2949,13 @@ int32_t tmq_get_topic_assignment(tmq_t* tmq, const char* pTopicName, tmq_topic_a for (int32_t j = 0; j < (*numOfAssignment); ++j) { SMqClientVg* pClientVg = taosArrayGet(pTopic->vgs, j); - if (pClientVg->offsetInfo.currentOffset.type != TMQ_OFFSET__LOG) { + if (pClientVg->offsetInfo.beginOffset.type != TMQ_OFFSET__LOG) { needFetch = true; break; } tmq_topic_assignment* pAssignment = &(*assignment)[j]; - pAssignment->currentOffset = pClientVg->offsetInfo.currentOffset.version; + pAssignment->currentOffset = pClientVg->offsetInfo.beginOffset.version; pAssignment->begin = pClientVg->offsetInfo.walVerBegin; pAssignment->end = pClientVg->offsetInfo.walVerEnd; pAssignment->vgId = pClientVg->vgId; @@ -2652,6 +2994,7 @@ int32_t tmq_get_topic_assignment(tmq_t* tmq, const char* pTopicName, tmq_topic_a SMqPollReq req = {0}; tmqBuildConsumeReqImpl(&req, tmq, 10, pTopic, pClientVg); + req.reqOffset = pClientVg->offsetInfo.beginOffset; int32_t msgSize = tSerializeSMqPollReq(NULL, 0, &req); if (msgSize < 0) { @@ -2691,7 +3034,7 @@ int32_t tmq_get_topic_assignment(tmq_t* tmq, const char* pTopicName, tmq_topic_a int64_t transporterId = 0; char offsetFormatBuf[TSDB_OFFSET_LEN] = {0}; - tFormatOffset(offsetFormatBuf, tListLen(offsetFormatBuf), &pClientVg->offsetInfo.currentOffset); + tFormatOffset(offsetFormatBuf, tListLen(offsetFormatBuf), &pClientVg->offsetInfo.beginOffset); tscInfo("consumer:0x%" PRIx64 " %s retrieve wal info vgId:%d, epoch %d, req:%s, reqId:0x%" PRIx64, tmq->consumerId, pTopic->topicName, pClientVg->vgId, tmq->epoch, offsetFormatBuf, req.reqId); @@ -2721,7 +3064,7 @@ int32_t tmq_get_topic_assignment(tmq_t* tmq, const char* pTopicName, tmq_topic_a } SVgOffsetInfo* pOffsetInfo = &pClientVg->offsetInfo; - tscInfo("vgId:%d offset is update to:%"PRId64, p->vgId, p->currentOffset); + tscInfo("consumer:0x%" PRIx64 " %s vgId:%d offset is update to:%"PRId64, tmq->consumerId, pTopic->topicName, p->vgId, p->currentOffset); pOffsetInfo->walVerBegin = p->begin; pOffsetInfo->walVerEnd = p->end; @@ -2748,88 +3091,111 @@ void tmq_free_assignment(tmq_topic_assignment* pAssignment) { taosMemoryFree(pAssignment); } +static int32_t tmqSeekCb(void* param, SDataBuf* pMsg, int32_t code) { + if (pMsg) { + taosMemoryFree(pMsg->pData); + taosMemoryFree(pMsg->pEpSet); + } + SMqSeekParam* pParam = param; + pParam->code = code; + tsem_post(&pParam->sem); + return 0; +} + +// seek interface have to send msg to server to cancel push handle if needed, because consumer may be in wait status if there is no data to poll int32_t tmq_offset_seek(tmq_t* tmq, const char* pTopicName, int32_t vgId, int64_t offset) { - if (tmq == NULL) { + if (tmq == NULL || pTopicName == NULL) { tscError("invalid tmq handle, null"); return TSDB_CODE_INVALID_PARA; } int32_t accId = tmq->pTscObj->acctId; - char tname[128] = {0}; + char tname[TSDB_TOPIC_FNAME_LEN] = {0}; sprintf(tname, "%d.%s", accId, pTopicName); taosWLockLatch(&tmq->lock); - SMqClientTopic* pTopic = getTopicByName(tmq, tname); - if (pTopic == NULL) { - tscError("consumer:0x%" PRIx64 " invalid topic name:%s", tmq->consumerId, pTopicName); - taosWUnLockLatch(&tmq->lock); - return TSDB_CODE_INVALID_PARA; - } SMqClientVg* pVg = NULL; - int32_t numOfVgs = taosArrayGetSize(pTopic->vgs); - for (int32_t i = 0; i < numOfVgs; ++i) { - SMqClientVg* pClientVg = taosArrayGet(pTopic->vgs, i); - if (pClientVg->vgId == vgId) { - pVg = pClientVg; - break; - } - } - - if (pVg == NULL) { - tscError("consumer:0x%" PRIx64 " invalid vgroup id:%d", tmq->consumerId, vgId); + int32_t code = getClientVg(tmq, tname, vgId, &pVg); + if(code != 0){ taosWUnLockLatch(&tmq->lock); - return TSDB_CODE_INVALID_PARA; + return code; } SVgOffsetInfo* pOffsetInfo = &pVg->offsetInfo; - int32_t type = pOffsetInfo->currentOffset.type; + int32_t type = pOffsetInfo->endOffset.type; if (isInSnapshotMode(type, tmq->useSnapshot)) { tscError("consumer:0x%" PRIx64 " offset type:%d not wal version, seek not allowed", tmq->consumerId, type); taosWUnLockLatch(&tmq->lock); return TSDB_CODE_TMQ_SNAPSHOT_ERROR; } - if (type == TMQ_OFFSET__LOG && (offset < pOffsetInfo->walVerBegin || offset > pOffsetInfo->walVerEnd)) { - tscError("consumer:0x%" PRIx64 " invalid seek params, offset:%" PRId64 ", valid range:[%" PRId64 ", %" PRId64 "]", - tmq->consumerId, offset, pOffsetInfo->walVerBegin, pOffsetInfo->walVerEnd); + code = checkWalRange(pOffsetInfo, offset); + if (code != 0) { taosWUnLockLatch(&tmq->lock); - return TSDB_CODE_INVALID_PARA; + return code; } + tscInfo("consumer:0x%" PRIx64 " seek to %" PRId64 " on vgId:%d", tmq->consumerId, offset, vgId); // update the offset, and then commit to vnode - pOffsetInfo->currentOffset.type = TMQ_OFFSET__LOG; - pOffsetInfo->currentOffset.version = offset >= 1 ? offset - 1 : 0; -// pOffsetInfo->committedOffset.version = INT64_MIN; + pOffsetInfo->endOffset.type = TMQ_OFFSET__LOG; + pOffsetInfo->endOffset.version = offset; + pOffsetInfo->beginOffset = pOffsetInfo->endOffset; pVg->seekUpdated = true; - - tscInfo("consumer:0x%" PRIx64 " seek to %" PRId64 " on vgId:%d", tmq->consumerId, offset, pVg->vgId); + SEpSet epSet = pVg->epSet; taosWUnLockLatch(&tmq->lock); -// SMqRspObj rspObj = {.resType = RES_TYPE__TMQ, .vgId = pVg->vgId}; -// tstrncpy(rspObj.topic, tname, tListLen(rspObj.topic)); -// -// SSyncCommitInfo* pInfo = taosMemoryMalloc(sizeof(SSyncCommitInfo)); -// if (pInfo == NULL) { -// tscError("consumer:0x%"PRIx64" failed to prepare seek operation", tmq->consumerId); -// return TSDB_CODE_OUT_OF_MEMORY; -// } -// -// tsem_init(&pInfo->sem, 0, 0); -// pInfo->code = 0; -// -// asyncCommitOffset(tmq, &rspObj, TDMT_VND_TMQ_SEEK_TO_OFFSET, commitCallBackFn, pInfo); -// -// tsem_wait(&pInfo->sem); -// int32_t code = pInfo->code; -// -// tsem_destroy(&pInfo->sem); -// taosMemoryFree(pInfo); -// -// if (code != TSDB_CODE_SUCCESS) { -// tscError("consumer:0x%" PRIx64 " failed to send seek to vgId:%d, code:%s", tmq->consumerId, pVg->vgId, tstrerror(code)); -// } + SMqSeekReq req = {0}; + snprintf(req.subKey, TSDB_SUBSCRIBE_KEY_LEN, "%s:%s", tmq->groupId, tname); + req.head.vgId = vgId; + req.consumerId = tmq->consumerId; - return 0; -} \ No newline at end of file + int32_t msgSize = tSerializeSMqSeekReq(NULL, 0, &req); + if (msgSize < 0) { + return TSDB_CODE_PAR_INTERNAL_ERROR; + } + + char* msg = taosMemoryCalloc(1, msgSize); + if (NULL == msg) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + if (tSerializeSMqSeekReq(msg, msgSize, &req) < 0) { + taosMemoryFree(msg); + return TSDB_CODE_PAR_INTERNAL_ERROR; + } + + SMsgSendInfo* sendInfo = taosMemoryCalloc(1, sizeof(SMsgSendInfo)); + if (sendInfo == NULL) { + taosMemoryFree(msg); + return TSDB_CODE_OUT_OF_MEMORY; + } + + SMqSeekParam* pParam = taosMemoryMalloc(sizeof(SMqSeekParam)); + if (pParam == NULL) { + taosMemoryFree(msg); + taosMemoryFree(sendInfo); + return TSDB_CODE_OUT_OF_MEMORY; + } + tsem_init(&pParam->sem, 0, 0); + + sendInfo->msgInfo = (SDataBuf){.pData = msg, .len = msgSize, .handle = NULL}; + sendInfo->requestId = generateRequestId(); + sendInfo->requestObjRefId = 0; + sendInfo->param = pParam; + sendInfo->fp = tmqSeekCb; + sendInfo->msgType = TDMT_VND_TMQ_SEEK; + + int64_t transporterId = 0; + asyncSendMsgToServer(tmq->pTscObj->pAppInfo->pTransporter, &epSet, &transporterId, sendInfo); + + tsem_wait(&pParam->sem); + code = pParam->code; + tsem_destroy(&pParam->sem); + taosMemoryFree(pParam); + + tscInfo("consumer:0x%" PRIx64 "send seek to vgId:%d, return code:%s", tmq->consumerId, vgId, tstrerror(code)); + + return code; +} diff --git a/source/client/test/clientTests.cpp b/source/client/test/clientTests.cpp index 3c46d1780222157c600e604c9198e20ca045effd..6f978b0143b8f30de1d83f2db3e0d404c6aec44d 100644 --- a/source/client/test/clientTests.cpp +++ b/source/client/test/clientTests.cpp @@ -34,6 +34,8 @@ namespace { void printSubResults(void* pRes, int32_t* totalRows) { char buf[1024]; + int32_t vgId = tmq_get_vgroup_id(pRes); + int64_t offset = tmq_get_vgroup_offset(pRes); while (1) { TAOS_ROW row = taos_fetch_row(pRes); if (row == NULL) { @@ -45,7 +47,7 @@ void printSubResults(void* pRes, int32_t* totalRows) { int32_t precision = taos_result_precision(pRes); taos_print_row(buf, row, fields, numOfFields); *totalRows += 1; - printf("precision: %d, row content: %s\n", precision, buf); + printf("vgId: %d, offset: %lld, precision: %d, row content: %s\n", vgId, offset, precision, buf); } // taos_free_result(pRes); @@ -1073,6 +1075,98 @@ TEST(clientCase, sub_db_test) { fprintf(stderr, "%d msg consumed, include %d rows\n", msgCnt, totalRows); } +TEST(clientCase, tmq_commit) { +// taos_options(TSDB_OPTION_CONFIGDIR, "~/first/cfg"); + + TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0); + ASSERT_NE(pConn, nullptr); + + tmq_conf_t* conf = tmq_conf_new(); + + tmq_conf_set(conf, "enable.auto.commit", "false"); + tmq_conf_set(conf, "auto.commit.interval.ms", "2000"); + tmq_conf_set(conf, "group.id", "group_id_2"); + tmq_conf_set(conf, "td.connect.user", "root"); + tmq_conf_set(conf, "td.connect.pass", "taosdata"); + tmq_conf_set(conf, "auto.offset.reset", "earliest"); + tmq_conf_set(conf, "msg.with.table.name", "true"); + + tmq_t* tmq = tmq_consumer_new(conf, NULL, 0); + tmq_conf_destroy(conf); + + char topicName[128] = "tp"; + // 创建订阅 topics 列表 + tmq_list_t* topicList = tmq_list_new(); + tmq_list_append(topicList, topicName); + + // 启动订阅 + tmq_subscribe(tmq, topicList); + tmq_list_destroy(topicList); + + int32_t totalRows = 0; + int32_t msgCnt = 0; + int32_t timeout = 2000; + + tmq_topic_assignment* pAssign = NULL; + int32_t numOfAssign = 0; + + int32_t code = tmq_get_topic_assignment(tmq, topicName, &pAssign, &numOfAssign); + if (code != 0) { + printf("error occurs:%s\n", tmq_err2str(code)); + tmq_free_assignment(pAssign); + tmq_consumer_close(tmq); + taos_close(pConn); + fprintf(stderr, "%d msg consumed, include %d rows\n", msgCnt, totalRows); + return; + } + + for(int i = 0; i < numOfAssign; i++){ + printf("assign i:%d, vgId:%d, offset:%lld, start:%lld, end:%lld\n", i, pAssign[i].vgId, pAssign[i].currentOffset, pAssign[i].begin, pAssign[i].end); + + int64_t committed = tmq_committed(tmq, topicName, pAssign[i].vgId); + printf("committed vgId:%d, committed:%lld\n", pAssign[i].vgId, committed); + + int64_t position = tmq_position(tmq, topicName, pAssign[i].vgId); + printf("position vgId:%d, position:%lld\n", pAssign[i].vgId, position); + tmq_offset_seek(tmq, topicName, pAssign[i].vgId, 1); + position = tmq_position(tmq, topicName, pAssign[i].vgId); + printf("after seek 1, position vgId:%d, position:%lld\n", pAssign[i].vgId, position); + } + + while (1) { + printf("start to poll\n"); + TAOS_RES* pRes = tmq_consumer_poll(tmq, timeout); + if (pRes) { + printSubResults(pRes, &totalRows); + } else { + break; + } + + tmq_commit_sync(tmq, pRes); + for(int i = 0; i < numOfAssign; i++) { + int64_t committed = tmq_committed(tmq, topicName, pAssign[i].vgId); + printf("committed vgId:%d, committed:%lld\n", pAssign[i].vgId, committed); + if(committed > 0){ + int32_t code = tmq_commit_offset_sync(tmq, topicName, pAssign[i].vgId, 4); + printf("tmq_commit_offset_sync vgId:%d, offset:4, code:%d\n", pAssign[i].vgId, code); + int64_t committed = tmq_committed(tmq, topicName, pAssign[i].vgId); + printf("after tmq_commit_offset_sync, committed vgId:%d, committed:%lld\n", pAssign[i].vgId, committed); + } + } + if (pRes != NULL) { + taos_free_result(pRes); + } + +// tmq_offset_seek(tmq, "tp", pAssign[0].vgId, pAssign[0].begin); + } + + tmq_free_assignment(pAssign); + + tmq_consumer_close(tmq); + taos_close(pConn); + fprintf(stderr, "%d msg consumed, include %d rows\n", msgCnt, totalRows); +} + TEST(clientCase, td_25129) { // taos_options(TSDB_OPTION_CONFIGDIR, "~/first/cfg"); @@ -1092,9 +1186,10 @@ TEST(clientCase, td_25129) { tmq_t* tmq = tmq_consumer_new(conf, NULL, 0); tmq_conf_destroy(conf); + char topicName[128] = "tp"; // 创建订阅 topics 列表 tmq_list_t* topicList = tmq_list_new(); - tmq_list_append(topicList, "tp"); + tmq_list_append(topicList, topicName); // 启动订阅 tmq_subscribe(tmq, topicList); @@ -1112,7 +1207,7 @@ TEST(clientCase, td_25129) { tmq_topic_assignment* pAssign = NULL; int32_t numOfAssign = 0; - int32_t code = tmq_get_topic_assignment(tmq, "tp", &pAssign, &numOfAssign); + int32_t code = tmq_get_topic_assignment(tmq, topicName, &pAssign, &numOfAssign); if (code != 0) { printf("error occurs:%s\n", tmq_err2str(code)); tmq_free_assignment(pAssign); @@ -1129,7 +1224,7 @@ TEST(clientCase, td_25129) { // tmq_offset_seek(tmq, "tp", pAssign[0].vgId, 4); tmq_free_assignment(pAssign); - code = tmq_get_topic_assignment(tmq, "tp", &pAssign, &numOfAssign); + code = tmq_get_topic_assignment(tmq, topicName, &pAssign, &numOfAssign); if (code != 0) { printf("error occurs:%s\n", tmq_err2str(code)); tmq_free_assignment(pAssign); @@ -1145,7 +1240,7 @@ TEST(clientCase, td_25129) { tmq_free_assignment(pAssign); - code = tmq_get_topic_assignment(tmq, "tp", &pAssign, &numOfAssign); + code = tmq_get_topic_assignment(tmq, topicName, &pAssign, &numOfAssign); if (code != 0) { printf("error occurs:%s\n", tmq_err2str(code)); tmq_free_assignment(pAssign); @@ -1156,10 +1251,12 @@ TEST(clientCase, td_25129) { } for(int i = 0; i < numOfAssign; i++){ - printf("assign i:%d, vgId:%d, offset:%lld, start:%lld, end:%lld\n", i, pAssign[i].vgId, pAssign[i].currentOffset, pAssign[i].begin, pAssign[i].end); + int64_t committed = tmq_committed(tmq, topicName, pAssign[i].vgId); + printf("assign i:%d, vgId:%d, committed:%lld, offset:%lld, start:%lld, end:%lld\n", i, pAssign[i].vgId, committed, pAssign[i].currentOffset, pAssign[i].begin, pAssign[i].end); } while (1) { + printf("start to poll\n"); TAOS_RES* pRes = tmq_consumer_poll(tmq, timeout); if (pRes) { char buf[128]; @@ -1173,10 +1270,26 @@ TEST(clientCase, td_25129) { // printf("vgroup id: %d\n", vgroupId); printSubResults(pRes, &totalRows); + + code = tmq_get_topic_assignment(tmq, topicName, &pAssign, &numOfAssign); + if (code != 0) { + printf("error occurs:%s\n", tmq_err2str(code)); + tmq_free_assignment(pAssign); + tmq_consumer_close(tmq); + taos_close(pConn); + fprintf(stderr, "%d msg consumed, include %d rows\n", msgCnt, totalRows); + return; + } + + for(int i = 0; i < numOfAssign; i++){ + printf("assign i:%d, vgId:%d, offset:%lld, start:%lld, end:%lld\n", i, pAssign[i].vgId, pAssign[i].currentOffset, pAssign[i].begin, pAssign[i].end); + } } else { - tmq_offset_seek(tmq, "tp", pAssign[0].vgId, pAssign[0].currentOffset); - tmq_offset_seek(tmq, "tp", pAssign[1].vgId, pAssign[1].currentOffset); - continue; + for(int i = 0; i < numOfAssign; i++) { + tmq_offset_seek(tmq, topicName, pAssign[i].vgId, pAssign[i].currentOffset); + } + tmq_commit_sync(tmq, pRes); + break; } // tmq_commit_sync(tmq, pRes); @@ -1208,6 +1321,7 @@ TEST(clientCase, td_25129) { printf("assign i:%d, vgId:%d, offset:%lld, start:%lld, end:%lld\n", i, pAssign[i].vgId, pAssign[i].currentOffset, pAssign[i].begin, pAssign[i].end); } + tmq_free_assignment(pAssign); tmq_consumer_close(tmq); taos_close(pConn); fprintf(stderr, "%d msg consumed, include %d rows\n", msgCnt, totalRows); diff --git a/source/common/src/systable.c b/source/common/src/systable.c index 6fdc74f6928d6f9d1f2920be9280ce505dc3b8f3..0940fcef6a749489077d45f931f4c27e8800f4ee 100644 --- a/source/common/src/systable.c +++ b/source/common/src/systable.c @@ -33,7 +33,7 @@ static const SSysDbTableSchema dnodesSchema[] = { {.name = "support_vnodes", .bytes = 2, .type = TSDB_DATA_TYPE_SMALLINT, .sysInfo = true}, {.name = "status", .bytes = 10 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = true}, {.name = "create_time", .bytes = 8, .type = TSDB_DATA_TYPE_TIMESTAMP, .sysInfo = true}, - {.name = "reboot_time", .bytes = 8, .type = TSDB_DATA_TYPE_TIMESTAMP, .sysInfo = true}, + {.name = "reboot_time", .bytes = 8, .type = TSDB_DATA_TYPE_TIMESTAMP, .sysInfo = true}, {.name = "note", .bytes = 256 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = true}, #ifdef TD_ENTERPRISE {.name = "active_code", .bytes = TSDB_ACTIVE_KEY_LEN + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = true}, @@ -47,7 +47,7 @@ static const SSysDbTableSchema mnodesSchema[] = { {.name = "role", .bytes = 12 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = true}, {.name = "status", .bytes = 9 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = true}, {.name = "create_time", .bytes = 8, .type = TSDB_DATA_TYPE_TIMESTAMP, .sysInfo = true}, - {.name = "reboot_time", .bytes = 8, .type = TSDB_DATA_TYPE_TIMESTAMP, .sysInfo = true}, + {.name = "role_time", .bytes = 8, .type = TSDB_DATA_TYPE_TIMESTAMP, .sysInfo = true}, }; static const SSysDbTableSchema modulesSchema[] = { @@ -73,7 +73,7 @@ static const SSysDbTableSchema clusterSchema[] = { {.name = "name", .bytes = TSDB_CLUSTER_ID_LEN + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = true}, {.name = "uptime", .bytes = 4, .type = TSDB_DATA_TYPE_INT, .sysInfo = true}, {.name = "create_time", .bytes = 8, .type = TSDB_DATA_TYPE_TIMESTAMP, .sysInfo = true}, - {.name = "version", .bytes = 10 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = true}, + {.name = "version", .bytes = 10 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = true}, {.name = "expire_time", .bytes = 8, .type = TSDB_DATA_TYPE_TIMESTAMP, .sysInfo = true}, }; @@ -271,6 +271,7 @@ static const SSysDbTableSchema variablesSchema[] = { {.name = "dnode_id", .bytes = 4, .type = TSDB_DATA_TYPE_INT}, {.name = "name", .bytes = TSDB_CONFIG_OPTION_LEN + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = true}, {.name = "value", .bytes = TSDB_CONFIG_VALUE_LEN + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = true}, + {.name = "scope", .bytes = TSDB_CONFIG_SCOPE_LEN + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = true}, }; static const SSysDbTableSchema topicSchema[] = { diff --git a/source/common/src/tdatablock.c b/source/common/src/tdatablock.c index b2f03fa7ba54606a924214f98559c818bbef3ac9..887a11083153c0daa3bf6f06a36ae49d6c65dab5 100644 --- a/source/common/src/tdatablock.c +++ b/source/common/src/tdatablock.c @@ -632,7 +632,10 @@ int32_t blockDataToBuf(char* buf, const SSDataBlock* pBlock) { pStart += colSize; } } else { - memcpy(pStart, pCol->pData, dataSize); + if (dataSize != 0) { + // ubsan reports error if pCol->pData==NULL && dataSize==0 + memcpy(pStart, pCol->pData, dataSize); + } pStart += dataSize; } } @@ -684,8 +687,10 @@ int32_t blockDataFromBuf(SSDataBlock* pBlock, const char* buf) { return TSDB_CODE_FAILED; } } - - memcpy(pCol->pData, pStart, colLength); + if (colLength != 0) { + // ubsan reports error if colLength==0 && pCol->pData == 0 + memcpy(pCol->pData, pStart, colLength); + } pStart += colLength; } diff --git a/source/common/src/tdataformat.c b/source/common/src/tdataformat.c index 0b121777544b33c918ca5c421e78e1b644f3109b..7c6939635a290adae506c2b712ef209c7a888326 100644 --- a/source/common/src/tdataformat.c +++ b/source/common/src/tdataformat.c @@ -2245,15 +2245,18 @@ static int32_t tColDataUpdateValue72(SColData *pColData, uint8_t *pData, uint32_ } return 0; } +static FORCE_INLINE int32_t tColDataUpdateNothing(SColData *pColData, uint8_t *pData, uint32_t nData, bool forward) { + return 0; +} static int32_t (*tColDataUpdateValueImpl[8][3])(SColData *pColData, uint8_t *pData, uint32_t nData, bool forward) = { - {NULL, NULL, NULL}, // 0 - {tColDataUpdateValue10, NULL, tColDataUpdateValue12}, // HAS_NONE - {tColDataUpdateValue20, NULL, NULL}, // HAS_NULL - {tColDataUpdateValue30, NULL, tColDataUpdateValue32}, // HAS_NULL|HAS_NONE - {tColDataUpdateValue40, NULL, tColDataUpdateValue42}, // HAS_VALUE - {tColDataUpdateValue50, NULL, tColDataUpdateValue52}, // HAS_VALUE|HAS_NONE - {tColDataUpdateValue60, NULL, tColDataUpdateValue62}, // HAS_VALUE|HAS_NULL - {tColDataUpdateValue70, NULL, tColDataUpdateValue72}, // HAS_VALUE|HAS_NULL|HAS_NONE + {NULL, NULL, NULL}, // 0 + {tColDataUpdateValue10, tColDataUpdateNothing, tColDataUpdateValue12}, // HAS_NONE + {tColDataUpdateValue20, tColDataUpdateNothing, tColDataUpdateNothing}, // HAS_NULL + {tColDataUpdateValue30, tColDataUpdateNothing, tColDataUpdateValue32}, // HAS_NULL|HAS_NONE + {tColDataUpdateValue40, tColDataUpdateNothing, tColDataUpdateValue42}, // HAS_VALUE + {tColDataUpdateValue50, tColDataUpdateNothing, tColDataUpdateValue52}, // HAS_VALUE|HAS_NONE + {tColDataUpdateValue60, tColDataUpdateNothing, tColDataUpdateValue62}, // HAS_VALUE|HAS_NULL + {tColDataUpdateValue70, tColDataUpdateNothing, tColDataUpdateValue72}, // HAS_VALUE|HAS_NULL|HAS_NONE // VALUE NONE NULL }; diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index 611a273353f777d86fce6e498bedce2df2a599f9..0546ed7f470e50a35caf1258cadbd13862df94ba 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -55,7 +55,7 @@ int32_t tsNumOfMnodeQueryThreads = 4; int32_t tsNumOfMnodeFetchThreads = 1; int32_t tsNumOfMnodeReadThreads = 1; int32_t tsNumOfVnodeQueryThreads = 4; -float tsRatioOfVnodeStreamThreads = 2.0; +float tsRatioOfVnodeStreamThreads = 4.0; int32_t tsNumOfVnodeFetchThreads = 4; int32_t tsNumOfVnodeRsmaThreads = 2; int32_t tsNumOfQnodeQueryThreads = 4; @@ -77,8 +77,14 @@ int64_t tsVndCommitMaxIntervalMs = 600 * 1000; int64_t tsMndSdbWriteDelta = 200; int64_t tsMndLogRetention = 2000; int8_t tsGrant = 1; +int32_t tsMndGrantMode = 0; bool tsMndSkipGrant = false; +// dnode +int64_t tsDndStart = 0; +int64_t tsDndStartOsUptime = 0; +int64_t tsDndUpTime = 0; + // monitor bool tsEnableMonitor = true; int32_t tsMonitorInterval = 30; @@ -99,6 +105,8 @@ char *tsClientCrashReportUri = "/ccrashreport"; char *tsSvrCrashReportUri = "/dcrashreport"; // schemaless +bool tsSmlDot2Underline = true; +char tsSmlTsDefaultName[TSDB_COL_NAME_LEN] = "_ts"; char tsSmlTagName[TSDB_COL_NAME_LEN] = "_tag_null"; char tsSmlChildTableName[TSDB_TABLE_NAME_LEN] = ""; // user defined child table name can be specified in tag value. // If set to empty system will generate table name using MD5 hash. @@ -299,38 +307,38 @@ static int32_t taosLoadCfg(SConfig *pCfg, const char **envCmd, const char *input } int32_t taosAddClientLogCfg(SConfig *pCfg) { - if (cfgAddDir(pCfg, "configDir", configDir, 1) != 0) return -1; - if (cfgAddDir(pCfg, "scriptDir", configDir, 1) != 0) return -1; - if (cfgAddDir(pCfg, "logDir", tsLogDir, 1) != 0) return -1; - if (cfgAddFloat(pCfg, "minimalLogDirGB", 1.0f, 0.001f, 10000000, 1) != 0) return -1; - if (cfgAddInt32(pCfg, "numOfLogLines", tsNumOfLogLines, 1000, 2000000000, 1) != 0) return -1; - if (cfgAddBool(pCfg, "asyncLog", tsAsyncLog, 1) != 0) return -1; - if (cfgAddInt32(pCfg, "logKeepDays", 0, -365000, 365000, 1) != 0) return -1; - if (cfgAddInt32(pCfg, "debugFlag", 0, 0, 255, 1) != 0) return -1; - if (cfgAddInt32(pCfg, "simDebugFlag", 143, 0, 255, 1) != 0) return -1; - if (cfgAddInt32(pCfg, "tmrDebugFlag", tmrDebugFlag, 0, 255, 1) != 0) return -1; - if (cfgAddInt32(pCfg, "uDebugFlag", uDebugFlag, 0, 255, 1) != 0) return -1; - if (cfgAddInt32(pCfg, "rpcDebugFlag", rpcDebugFlag, 0, 255, 1) != 0) return -1; - if (cfgAddInt32(pCfg, "jniDebugFlag", jniDebugFlag, 0, 255, 1) != 0) return -1; - if (cfgAddInt32(pCfg, "qDebugFlag", qDebugFlag, 0, 255, 1) != 0) return -1; - if (cfgAddInt32(pCfg, "cDebugFlag", cDebugFlag, 0, 255, 1) != 0) return -1; + if (cfgAddDir(pCfg, "configDir", configDir, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddDir(pCfg, "scriptDir", configDir, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddDir(pCfg, "logDir", tsLogDir, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddFloat(pCfg, "minimalLogDirGB", 1.0f, 0.001f, 10000000, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddInt32(pCfg, "numOfLogLines", tsNumOfLogLines, 1000, 2000000000, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddBool(pCfg, "asyncLog", tsAsyncLog, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddInt32(pCfg, "logKeepDays", 0, -365000, 365000, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddInt32(pCfg, "debugFlag", 0, 0, 255, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddInt32(pCfg, "simDebugFlag", 143, 0, 255, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddInt32(pCfg, "tmrDebugFlag", tmrDebugFlag, 0, 255, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddInt32(pCfg, "uDebugFlag", uDebugFlag, 0, 255, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddInt32(pCfg, "rpcDebugFlag", rpcDebugFlag, 0, 255, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddInt32(pCfg, "jniDebugFlag", jniDebugFlag, 0, 255, CFG_SCOPE_CLIENT) != 0) return -1; + if (cfgAddInt32(pCfg, "qDebugFlag", qDebugFlag, 0, 255, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddInt32(pCfg, "cDebugFlag", cDebugFlag, 0, 255, CFG_SCOPE_CLIENT) != 0) return -1; return 0; } static int32_t taosAddServerLogCfg(SConfig *pCfg) { - if (cfgAddInt32(pCfg, "dDebugFlag", dDebugFlag, 0, 255, 0) != 0) return -1; - if (cfgAddInt32(pCfg, "vDebugFlag", vDebugFlag, 0, 255, 0) != 0) return -1; - if (cfgAddInt32(pCfg, "mDebugFlag", mDebugFlag, 0, 255, 0) != 0) return -1; - if (cfgAddInt32(pCfg, "wDebugFlag", wDebugFlag, 0, 255, 0) != 0) return -1; - if (cfgAddInt32(pCfg, "sDebugFlag", sDebugFlag, 0, 255, 0) != 0) return -1; - if (cfgAddInt32(pCfg, "tsdbDebugFlag", tsdbDebugFlag, 0, 255, 0) != 0) return -1; - if (cfgAddInt32(pCfg, "tqDebugFlag", tqDebugFlag, 0, 255, 0) != 0) return -1; - if (cfgAddInt32(pCfg, "fsDebugFlag", fsDebugFlag, 0, 255, 0) != 0) return -1; - if (cfgAddInt32(pCfg, "udfDebugFlag", udfDebugFlag, 0, 255, 0) != 0) return -1; - if (cfgAddInt32(pCfg, "smaDebugFlag", smaDebugFlag, 0, 255, 0) != 0) return -1; - if (cfgAddInt32(pCfg, "idxDebugFlag", idxDebugFlag, 0, 255, 0) != 0) return -1; - if (cfgAddInt32(pCfg, "tdbDebugFlag", tdbDebugFlag, 0, 255, 0) != 0) return -1; - if (cfgAddInt32(pCfg, "metaDebugFlag", metaDebugFlag, 0, 255, 0) != 0) return -1; + if (cfgAddInt32(pCfg, "dDebugFlag", dDebugFlag, 0, 255, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "vDebugFlag", vDebugFlag, 0, 255, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "mDebugFlag", mDebugFlag, 0, 255, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "wDebugFlag", wDebugFlag, 0, 255, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "sDebugFlag", sDebugFlag, 0, 255, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddInt32(pCfg, "tsdbDebugFlag", tsdbDebugFlag, 0, 255, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "tqDebugFlag", tqDebugFlag, 0, 255, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "fsDebugFlag", fsDebugFlag, 0, 255, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "udfDebugFlag", udfDebugFlag, 0, 255, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "smaDebugFlag", smaDebugFlag, 0, 255, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "idxDebugFlag", idxDebugFlag, 0, 255, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "tdbDebugFlag", tdbDebugFlag, 0, 255, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "metaDebugFlag", metaDebugFlag, 0, 255, 0) != CFG_SCOPE_SERVER) return -1; return 0; } @@ -341,53 +349,54 @@ static int32_t taosAddClientCfg(SConfig *pCfg) { strcpy(defaultFqdn, "localhost"); } - if (cfgAddString(pCfg, "firstEp", "", 1) != 0) return -1; - if (cfgAddString(pCfg, "secondEp", "", 1) != 0) return -1; - if (cfgAddString(pCfg, "fqdn", defaultFqdn, 1) != 0) return -1; - if (cfgAddInt32(pCfg, "serverPort", defaultServerPort, 1, 65056, 1) != 0) return -1; - if (cfgAddDir(pCfg, "tempDir", tsTempDir, 1) != 0) return -1; - if (cfgAddFloat(pCfg, "minimalTmpDirGB", 1.0f, 0.001f, 10000000, 1) != 0) return -1; - if (cfgAddInt32(pCfg, "shellActivityTimer", tsShellActivityTimer, 1, 120, 1) != 0) return -1; - if (cfgAddInt32(pCfg, "compressMsgSize", tsCompressMsgSize, -1, 100000000, 1) != 0) return -1; - if (cfgAddInt32(pCfg, "compressColData", tsCompressColData, -1, 100000000, 1) != 0) return -1; - if (cfgAddInt32(pCfg, "queryPolicy", tsQueryPolicy, 1, 4, 1) != 0) return -1; - if (cfgAddBool(pCfg, "enableQueryHb", tsEnableQueryHb, false) != 0) return -1; - if (cfgAddBool(pCfg, "enableScience", tsEnableScience, false) != 0) return -1; - if (cfgAddInt32(pCfg, "querySmaOptimize", tsQuerySmaOptimize, 0, 1, 1) != 0) return -1; - if (cfgAddBool(pCfg, "queryPlannerTrace", tsQueryPlannerTrace, true) != 0) return -1; - if (cfgAddInt32(pCfg, "queryNodeChunkSize", tsQueryNodeChunkSize, 1024, 128 * 1024, true) != 0) return -1; - if (cfgAddBool(pCfg, "queryUseNodeAllocator", tsQueryUseNodeAllocator, true) != 0) return -1; - if (cfgAddBool(pCfg, "keepColumnName", tsKeepColumnName, true) != 0) return -1; - if (cfgAddString(pCfg, "smlChildTableName", "", 1) != 0) return -1; - if (cfgAddString(pCfg, "smlTagName", tsSmlTagName, 1) != 0) return -1; - // if (cfgAddBool(pCfg, "smlDataFormat", tsSmlDataFormat, 1) != 0) return -1; - // if (cfgAddInt32(pCfg, "smlBatchSize", tsSmlBatchSize, 1, INT32_MAX, true) != 0) return -1; - if (cfgAddInt32(pCfg, "maxInsertBatchRows", tsMaxInsertBatchRows, 1, INT32_MAX, true) != 0) return -1; - if (cfgAddInt32(pCfg, "maxRetryWaitTime", tsMaxRetryWaitTime, 0, 86400000, 0) != 0) return -1; - if (cfgAddBool(pCfg, "useAdapter", tsUseAdapter, true) != 0) return -1; - if (cfgAddBool(pCfg, "crashReporting", tsEnableCrashReport, true) != 0) return -1; - if (cfgAddInt64(pCfg, "queryMaxConcurrentTables", tsQueryMaxConcurrentTables, INT64_MIN, INT64_MAX, 1) != 0) - return -1; - if (cfgAddInt32(pCfg, "metaCacheMaxSize", tsMetaCacheMaxSize, -1, INT32_MAX, 1) != 0) return -1; - if (cfgAddInt32(pCfg, "slowLogThreshold", tsSlowLogThreshold, 0, INT32_MAX, true) != 0) return -1; - if (cfgAddString(pCfg, "slowLogScope", "", true) != 0) return -1; + if (cfgAddString(pCfg, "firstEp", "", CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddString(pCfg, "secondEp", "", CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddString(pCfg, "fqdn", defaultFqdn, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "serverPort", defaultServerPort, 1, 65056, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddDir(pCfg, "tempDir", tsTempDir, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddFloat(pCfg, "minimalTmpDirGB", 1.0f, 0.001f, 10000000, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddInt32(pCfg, "shellActivityTimer", tsShellActivityTimer, 1, 120, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddInt32(pCfg, "compressMsgSize", tsCompressMsgSize, -1, 100000000, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddInt32(pCfg, "compressColData", tsCompressColData, -1, 100000000, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddInt32(pCfg, "queryPolicy", tsQueryPolicy, 1, 4, CFG_SCOPE_CLIENT) != 0) return -1; + if (cfgAddBool(pCfg, "enableQueryHb", tsEnableQueryHb, CFG_SCOPE_CLIENT) != 0) return -1; + if (cfgAddBool(pCfg, "enableScience", tsEnableScience, CFG_SCOPE_CLIENT) != 0) return -1; + if (cfgAddInt32(pCfg, "querySmaOptimize", tsQuerySmaOptimize, 0, 1, CFG_SCOPE_CLIENT) != 0) return -1; + if (cfgAddBool(pCfg, "queryPlannerTrace", tsQueryPlannerTrace, CFG_SCOPE_CLIENT) != 0) return -1; + if (cfgAddInt32(pCfg, "queryNodeChunkSize", tsQueryNodeChunkSize, 1024, 128 * 1024, CFG_SCOPE_CLIENT) != 0) return -1; + if (cfgAddBool(pCfg, "queryUseNodeAllocator", tsQueryUseNodeAllocator, CFG_SCOPE_CLIENT) != 0) return -1; + if (cfgAddBool(pCfg, "keepColumnName", tsKeepColumnName, CFG_SCOPE_CLIENT) != 0) return -1; + if (cfgAddString(pCfg, "smlChildTableName", "", CFG_SCOPE_CLIENT) != 0) return -1; + if (cfgAddString(pCfg, "smlTagName", tsSmlTagName, CFG_SCOPE_CLIENT) != 0) return -1; + if (cfgAddString(pCfg, "smlTsDefaultName", tsSmlTsDefaultName, CFG_SCOPE_CLIENT) != 0) return -1; + if (cfgAddBool(pCfg, "smlDot2Underline", tsSmlDot2Underline, CFG_SCOPE_CLIENT) != 0) return -1; + // if (cfgAddBool(pCfg, "smlDataFormat", tsSmlDataFormat, CFG_SCOPE_CLIENT) != 0) return -1; + // if (cfgAddInt32(pCfg, "smlBatchSize", tsSmlBatchSize, 1, INT32_MAX, CFG_SCOPE_CLIENT) != 0) return -1; + if (cfgAddInt32(pCfg, "maxInsertBatchRows", tsMaxInsertBatchRows, 1, INT32_MAX, CFG_SCOPE_CLIENT) != 0) return -1; + if (cfgAddInt32(pCfg, "maxRetryWaitTime", tsMaxRetryWaitTime, 0, 86400000, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddBool(pCfg, "useAdapter", tsUseAdapter, CFG_SCOPE_CLIENT) != 0) return -1; + if (cfgAddBool(pCfg, "crashReporting", tsEnableCrashReport, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddInt64(pCfg, "queryMaxConcurrentTables", tsQueryMaxConcurrentTables, INT64_MIN, INT64_MAX, CFG_SCOPE_CLIENT) != 0) return -1; + if (cfgAddInt32(pCfg, "metaCacheMaxSize", tsMetaCacheMaxSize, -1, INT32_MAX, CFG_SCOPE_CLIENT) != 0) return -1; + if (cfgAddInt32(pCfg, "slowLogThreshold", tsSlowLogThreshold, 0, INT32_MAX, CFG_SCOPE_CLIENT) != 0) return -1; + if (cfgAddString(pCfg, "slowLogScope", "", CFG_SCOPE_CLIENT) != 0) return -1; tsNumOfRpcThreads = tsNumOfCores / 2; tsNumOfRpcThreads = TRANGE(tsNumOfRpcThreads, 2, TSDB_MAX_RPC_THREADS); - if (cfgAddInt32(pCfg, "numOfRpcThreads", tsNumOfRpcThreads, 1, 1024, 0) != 0) return -1; + if (cfgAddInt32(pCfg, "numOfRpcThreads", tsNumOfRpcThreads, 1, 1024, CFG_SCOPE_BOTH) != 0) return -1; tsNumOfRpcSessions = TRANGE(tsNumOfRpcSessions, 100, 100000); - if (cfgAddInt32(pCfg, "numOfRpcSessions", tsNumOfRpcSessions, 1, 100000, 0) != 0) return -1; + if (cfgAddInt32(pCfg, "numOfRpcSessions", tsNumOfRpcSessions, 1, 100000, CFG_SCOPE_BOTH) != 0) return -1; tsTimeToGetAvailableConn = TRANGE(tsTimeToGetAvailableConn, 20, 10000000); - if (cfgAddInt32(pCfg, "timeToGetAvailableConn", tsTimeToGetAvailableConn, 20, 1000000, 0) != 0) return -1; + if (cfgAddInt32(pCfg, "timeToGetAvailableConn", tsTimeToGetAvailableConn, 20, 1000000, CFG_SCOPE_BOTH) != 0) return -1; tsNumOfTaskQueueThreads = tsNumOfCores / 2; tsNumOfTaskQueueThreads = TMAX(tsNumOfTaskQueueThreads, 4); if (tsNumOfTaskQueueThreads >= 10) { tsNumOfTaskQueueThreads = 10; } - if (cfgAddInt32(pCfg, "numOfTaskQueueThreads", tsNumOfTaskQueueThreads, 4, 1024, 0) != 0) return -1; + if (cfgAddInt32(pCfg, "numOfTaskQueueThreads", tsNumOfTaskQueueThreads, 4, 1024, CFG_SCOPE_CLIENT) != 0) return -1; return 0; } @@ -395,92 +404,92 @@ static int32_t taosAddClientCfg(SConfig *pCfg) { static int32_t taosAddSystemCfg(SConfig *pCfg) { SysNameInfo info = taosGetSysNameInfo(); - if (cfgAddTimezone(pCfg, "timezone", tsTimezoneStr) != 0) return -1; - if (cfgAddLocale(pCfg, "locale", tsLocale) != 0) return -1; - if (cfgAddCharset(pCfg, "charset", tsCharset) != 0) return -1; - if (cfgAddBool(pCfg, "assert", 1, 1) != 0) return -1; - if (cfgAddBool(pCfg, "enableCoreFile", 1, 1) != 0) return -1; - if (cfgAddFloat(pCfg, "numOfCores", tsNumOfCores, 1, 100000, 1) != 0) return -1; - - if (cfgAddBool(pCfg, "SSE42", tsSSE42Enable, 0) != 0) return -1; - if (cfgAddBool(pCfg, "AVX", tsAVXEnable, 0) != 0) return -1; - if (cfgAddBool(pCfg, "AVX2", tsAVX2Enable, 0) != 0) return -1; - if (cfgAddBool(pCfg, "FMA", tsFMAEnable, 0) != 0) return -1; - if (cfgAddBool(pCfg, "SIMD-builtins", tsSIMDBuiltins, 0) != 0) return -1; - if (cfgAddBool(pCfg, "tagFilterCache", tsTagFilterCache, 0) != 0) return -1; - - if (cfgAddInt64(pCfg, "openMax", tsOpenMax, 0, INT64_MAX, 1) != 0) return -1; + if (cfgAddTimezone(pCfg, "timezone", tsTimezoneStr, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddLocale(pCfg, "locale", tsLocale, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddCharset(pCfg, "charset", tsCharset, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddBool(pCfg, "assert", 1, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddBool(pCfg, "enableCoreFile", 1, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddFloat(pCfg, "numOfCores", tsNumOfCores, 1, 100000, CFG_SCOPE_BOTH) != 0) return -1; + + if (cfgAddBool(pCfg, "SSE42", tsSSE42Enable, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddBool(pCfg, "AVX", tsAVXEnable, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddBool(pCfg, "AVX2", tsAVX2Enable, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddBool(pCfg, "FMA", tsFMAEnable, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddBool(pCfg, "SIMD-builtins", tsSIMDBuiltins, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddBool(pCfg, "tagFilterCache", tsTagFilterCache, CFG_SCOPE_BOTH) != 0) return -1; + + if (cfgAddInt64(pCfg, "openMax", tsOpenMax, 0, INT64_MAX, CFG_SCOPE_BOTH) != 0) return -1; #if !defined(_ALPINE) - if (cfgAddInt64(pCfg, "streamMax", tsStreamMax, 0, INT64_MAX, 1) != 0) return -1; + if (cfgAddInt64(pCfg, "streamMax", tsStreamMax, 0, INT64_MAX, CFG_SCOPE_BOTH) != 0) return -1; #endif - if (cfgAddInt32(pCfg, "pageSizeKB", tsPageSizeKB, 0, INT64_MAX, 1) != 0) return -1; - if (cfgAddInt64(pCfg, "totalMemoryKB", tsTotalMemoryKB, 0, INT64_MAX, 1) != 0) return -1; - if (cfgAddString(pCfg, "os sysname", info.sysname, 1) != 0) return -1; - if (cfgAddString(pCfg, "os nodename", info.nodename, 1) != 0) return -1; - if (cfgAddString(pCfg, "os release", info.release, 1) != 0) return -1; - if (cfgAddString(pCfg, "os version", info.version, 1) != 0) return -1; - if (cfgAddString(pCfg, "os machine", info.machine, 1) != 0) return -1; - - if (cfgAddString(pCfg, "version", version, 1) != 0) return -1; - if (cfgAddString(pCfg, "compatible_version", compatible_version, 1) != 0) return -1; - if (cfgAddString(pCfg, "gitinfo", gitinfo, 1) != 0) return -1; - if (cfgAddString(pCfg, "buildinfo", buildinfo, 1) != 0) return -1; + if (cfgAddInt32(pCfg, "pageSizeKB", tsPageSizeKB, 0, INT64_MAX, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddInt64(pCfg, "totalMemoryKB", tsTotalMemoryKB, 0, INT64_MAX, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddString(pCfg, "os sysname", info.sysname, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddString(pCfg, "os nodename", info.nodename, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddString(pCfg, "os release", info.release, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddString(pCfg, "os version", info.version, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddString(pCfg, "os machine", info.machine, CFG_SCOPE_BOTH) != 0) return -1; + + if (cfgAddString(pCfg, "version", version, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddString(pCfg, "compatible_version", compatible_version, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddString(pCfg, "gitinfo", gitinfo, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddString(pCfg, "buildinfo", buildinfo, CFG_SCOPE_BOTH) != 0) return -1; return 0; } static int32_t taosAddServerCfg(SConfig *pCfg) { - if (cfgAddDir(pCfg, "dataDir", tsDataDir, 0) != 0) return -1; - if (cfgAddFloat(pCfg, "minimalDataDirGB", 2.0f, 0.001f, 10000000, 0) != 0) return -1; + if (cfgAddDir(pCfg, "dataDir", tsDataDir, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddFloat(pCfg, "minimalDataDirGB", 2.0f, 0.001f, 10000000, CFG_SCOPE_SERVER) != 0) return -1; tsNumOfSupportVnodes = tsNumOfCores * 2; tsNumOfSupportVnodes = TMAX(tsNumOfSupportVnodes, 2); - if (cfgAddInt32(pCfg, "supportVnodes", tsNumOfSupportVnodes, 0, 4096, 0) != 0) return -1; - - if (cfgAddInt32(pCfg, "maxShellConns", tsMaxShellConns, 10, 50000000, 0) != 0) return -1; - if (cfgAddInt32(pCfg, "statusInterval", tsStatusInterval, 1, 30, 0) != 0) return -1; - if (cfgAddInt32(pCfg, "minSlidingTime", tsMinSlidingTime, 1, 1000000, 0) != 0) return -1; - if (cfgAddInt32(pCfg, "minIntervalTime", tsMinIntervalTime, 1, 1000000, 0) != 0) return -1; - if (cfgAddInt32(pCfg, "maxNumOfDistinctRes", tsMaxNumOfDistinctResults, 10 * 10000, 10000 * 10000, 0) != 0) return -1; - if (cfgAddInt32(pCfg, "countAlwaysReturnValue", tsCountAlwaysReturnValue, 0, 1, 0) != 0) return -1; - if (cfgAddInt32(pCfg, "queryBufferSize", tsQueryBufferSize, -1, 500000000000, 0) != 0) return -1; - if (cfgAddBool(pCfg, "printAuth", tsPrintAuth, 0) != 0) return -1; - if (cfgAddInt32(pCfg, "queryRspPolicy", tsQueryRspPolicy, 0, 1, 0) != 0) return -1; + if (cfgAddInt32(pCfg, "supportVnodes", tsNumOfSupportVnodes, 0, 4096, CFG_SCOPE_SERVER) != 0) return -1; + + if (cfgAddInt32(pCfg, "maxShellConns", tsMaxShellConns, 10, 50000000, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "statusInterval", tsStatusInterval, 1, 30, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "minSlidingTime", tsMinSlidingTime, 1, 1000000, CFG_SCOPE_CLIENT) != 0) return -1; + if (cfgAddInt32(pCfg, "minIntervalTime", tsMinIntervalTime, 1, 1000000, CFG_SCOPE_CLIENT) != 0) return -1; + if (cfgAddInt32(pCfg, "maxNumOfDistinctRes", tsMaxNumOfDistinctResults, 10 * 10000, 10000 * 10000, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "countAlwaysReturnValue", tsCountAlwaysReturnValue, 0, 1, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddInt32(pCfg, "queryBufferSize", tsQueryBufferSize, -1, 500000000000, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddBool(pCfg, "printAuth", tsPrintAuth, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "queryRspPolicy", tsQueryRspPolicy, 0, 1, CFG_SCOPE_SERVER) != 0) return -1; tsNumOfRpcThreads = tsNumOfCores / 2; tsNumOfRpcThreads = TRANGE(tsNumOfRpcThreads, 2, TSDB_MAX_RPC_THREADS); - if (cfgAddInt32(pCfg, "numOfRpcThreads", tsNumOfRpcThreads, 1, 1024, 0) != 0) return -1; + if (cfgAddInt32(pCfg, "numOfRpcThreads", tsNumOfRpcThreads, 1, 1024, CFG_SCOPE_BOTH) != 0) return -1; tsNumOfRpcSessions = TRANGE(tsNumOfRpcSessions, 100, 10000); - if (cfgAddInt32(pCfg, "numOfRpcSessions", tsNumOfRpcSessions, 1, 100000, 0) != 0) return -1; + if (cfgAddInt32(pCfg, "numOfRpcSessions", tsNumOfRpcSessions, 1, 100000, CFG_SCOPE_BOTH) != 0) return -1; tsTimeToGetAvailableConn = TRANGE(tsTimeToGetAvailableConn, 20, 1000000); - if (cfgAddInt32(pCfg, "timeToGetAvailableConn", tsNumOfRpcSessions, 20, 1000000, 0) != 0) return -1; + if (cfgAddInt32(pCfg, "timeToGetAvailableConn", tsNumOfRpcSessions, 20, 1000000, CFG_SCOPE_BOTH) != 0) return -1; tsNumOfCommitThreads = tsNumOfCores / 2; tsNumOfCommitThreads = TRANGE(tsNumOfCommitThreads, 2, 4); - if (cfgAddInt32(pCfg, "numOfCommitThreads", tsNumOfCommitThreads, 1, 1024, 0) != 0) return -1; + if (cfgAddInt32(pCfg, "numOfCommitThreads", tsNumOfCommitThreads, 1, 1024, CFG_SCOPE_SERVER) != 0) return -1; tsNumOfMnodeReadThreads = tsNumOfCores / 8; tsNumOfMnodeReadThreads = TRANGE(tsNumOfMnodeReadThreads, 1, 4); - if (cfgAddInt32(pCfg, "numOfMnodeReadThreads", tsNumOfMnodeReadThreads, 1, 1024, 0) != 0) return -1; + if (cfgAddInt32(pCfg, "numOfMnodeReadThreads", tsNumOfMnodeReadThreads, 1, 1024, CFG_SCOPE_SERVER) != 0) return -1; tsNumOfVnodeQueryThreads = tsNumOfCores * 2; tsNumOfVnodeQueryThreads = TMAX(tsNumOfVnodeQueryThreads, 4); - if (cfgAddInt32(pCfg, "numOfVnodeQueryThreads", tsNumOfVnodeQueryThreads, 4, 1024, 0) != 0) return -1; + if (cfgAddInt32(pCfg, "numOfVnodeQueryThreads", tsNumOfVnodeQueryThreads, 4, 1024, CFG_SCOPE_SERVER) != 0) return -1; - if (cfgAddFloat(pCfg, "ratioOfVnodeStreamThreads", tsRatioOfVnodeStreamThreads, 0.01, 100, 0) != 0) return -1; + if (cfgAddFloat(pCfg, "ratioOfVnodeStreamThreads", tsRatioOfVnodeStreamThreads, 0.01, 100, CFG_SCOPE_SERVER) != 0) return -1; tsNumOfVnodeFetchThreads = tsNumOfCores / 4; tsNumOfVnodeFetchThreads = TMAX(tsNumOfVnodeFetchThreads, 4); - if (cfgAddInt32(pCfg, "numOfVnodeFetchThreads", tsNumOfVnodeFetchThreads, 4, 1024, 0) != 0) return -1; + if (cfgAddInt32(pCfg, "numOfVnodeFetchThreads", tsNumOfVnodeFetchThreads, 4, 1024, CFG_SCOPE_SERVER) != 0) return -1; tsNumOfVnodeRsmaThreads = tsNumOfCores; tsNumOfVnodeRsmaThreads = TMAX(tsNumOfVnodeRsmaThreads, 4); - if (cfgAddInt32(pCfg, "numOfVnodeRsmaThreads", tsNumOfVnodeRsmaThreads, 1, 1024, 0) != 0) return -1; + if (cfgAddInt32(pCfg, "numOfVnodeRsmaThreads", tsNumOfVnodeRsmaThreads, 1, 1024, CFG_SCOPE_SERVER) != 0) return -1; tsNumOfQnodeQueryThreads = tsNumOfCores * 2; tsNumOfQnodeQueryThreads = TMAX(tsNumOfQnodeQueryThreads, 4); - if (cfgAddInt32(pCfg, "numOfQnodeQueryThreads", tsNumOfQnodeQueryThreads, 4, 1024, 0) != 0) return -1; + if (cfgAddInt32(pCfg, "numOfQnodeQueryThreads", tsNumOfQnodeQueryThreads, 4, 1024, CFG_SCOPE_SERVER) != 0) return -1; // tsNumOfQnodeFetchThreads = tsNumOfCores / 2; // tsNumOfQnodeFetchThreads = TMAX(tsNumOfQnodeFetchThreads, 4); @@ -488,67 +497,68 @@ static int32_t taosAddServerCfg(SConfig *pCfg) { tsNumOfSnodeStreamThreads = tsNumOfCores / 4; tsNumOfSnodeStreamThreads = TRANGE(tsNumOfSnodeStreamThreads, 2, 4); - if (cfgAddInt32(pCfg, "numOfSnodeSharedThreads", tsNumOfSnodeStreamThreads, 2, 1024, 0) != 0) return -1; + if (cfgAddInt32(pCfg, "numOfSnodeSharedThreads", tsNumOfSnodeStreamThreads, 2, 1024, CFG_SCOPE_SERVER) != 0) return -1; tsNumOfSnodeWriteThreads = tsNumOfCores / 4; tsNumOfSnodeWriteThreads = TRANGE(tsNumOfSnodeWriteThreads, 2, 4); - if (cfgAddInt32(pCfg, "numOfSnodeUniqueThreads", tsNumOfSnodeWriteThreads, 2, 1024, 0) != 0) return -1; + if (cfgAddInt32(pCfg, "numOfSnodeUniqueThreads", tsNumOfSnodeWriteThreads, 2, 1024, CFG_SCOPE_SERVER) != 0) return -1; tsRpcQueueMemoryAllowed = tsTotalMemoryKB * 1024 * 0.1; tsRpcQueueMemoryAllowed = TRANGE(tsRpcQueueMemoryAllowed, TSDB_MAX_MSG_SIZE * 10LL, TSDB_MAX_MSG_SIZE * 10000LL); - if (cfgAddInt64(pCfg, "rpcQueueMemoryAllowed", tsRpcQueueMemoryAllowed, TSDB_MAX_MSG_SIZE * 10L, INT64_MAX, 0) != 0) + if (cfgAddInt64(pCfg, "rpcQueueMemoryAllowed", tsRpcQueueMemoryAllowed, TSDB_MAX_MSG_SIZE * 10L, INT64_MAX, CFG_SCOPE_BOTH) != 0) return -1; - if (cfgAddInt32(pCfg, "syncElectInterval", tsElectInterval, 10, 1000 * 60 * 24 * 2, 0) != 0) return -1; - if (cfgAddInt32(pCfg, "syncHeartbeatInterval", tsHeartbeatInterval, 10, 1000 * 60 * 24 * 2, 0) != 0) return -1; - if (cfgAddInt32(pCfg, "syncHeartbeatTimeout", tsHeartbeatTimeout, 10, 1000 * 60 * 24 * 2, 0) != 0) return -1; - - if (cfgAddInt64(pCfg, "vndCommitMaxInterval", tsVndCommitMaxIntervalMs, 1000, 1000 * 60 * 60, 0) != 0) return -1; - - if (cfgAddInt64(pCfg, "mndSdbWriteDelta", tsMndSdbWriteDelta, 20, 10000, 0) != 0) return -1; - if (cfgAddInt64(pCfg, "mndLogRetention", tsMndLogRetention, 500, 10000, 0) != 0) return -1; - if (cfgAddBool(pCfg, "skipGrant", tsMndSkipGrant, 0) != 0) return -1; - - if (cfgAddBool(pCfg, "monitor", tsEnableMonitor, 0) != 0) return -1; - if (cfgAddInt32(pCfg, "monitorInterval", tsMonitorInterval, 1, 200000, 0) != 0) return -1; - if (cfgAddString(pCfg, "monitorFqdn", tsMonitorFqdn, 0) != 0) return -1; - if (cfgAddInt32(pCfg, "monitorPort", tsMonitorPort, 1, 65056, 0) != 0) return -1; - if (cfgAddInt32(pCfg, "monitorMaxLogs", tsMonitorMaxLogs, 1, 1000000, 0) != 0) return -1; - if (cfgAddBool(pCfg, "monitorComp", tsMonitorComp, 0) != 0) return -1; - - if (cfgAddBool(pCfg, "crashReporting", tsEnableCrashReport, 0) != 0) return -1; - if (cfgAddBool(pCfg, "telemetryReporting", tsEnableTelem, 0) != 0) return -1; - if (cfgAddInt32(pCfg, "telemetryInterval", tsTelemInterval, 1, 200000, 0) != 0) return -1; - if (cfgAddString(pCfg, "telemetryServer", tsTelemServer, 0) != 0) return -1; - if (cfgAddInt32(pCfg, "telemetryPort", tsTelemPort, 1, 65056, 0) != 0) return -1; - - if (cfgAddInt32(pCfg, "tmqMaxTopicNum", tmqMaxTopicNum, 1, 10000, 1) != 0) return -1; - - if (cfgAddInt32(pCfg, "transPullupInterval", tsTransPullupInterval, 1, 10000, 1) != 0) return -1; - if (cfgAddInt32(pCfg, "mqRebalanceInterval", tsMqRebalanceInterval, 1, 10000, 1) != 0) return -1; - if (cfgAddInt32(pCfg, "ttlUnit", tsTtlUnit, 1, 86400 * 365, 1) != 0) return -1; - if (cfgAddInt32(pCfg, "ttlPushInterval", tsTtlPushInterval, 1, 100000, 1) != 0) return -1; - if (cfgAddBool(pCfg, "ttlChangeOnWrite", tsTtlChangeOnWrite, 0) != 0) return -1; - if (cfgAddInt32(pCfg, "uptimeInterval", tsUptimeInterval, 1, 100000, 1) != 0) return -1; - if (cfgAddInt32(pCfg, "queryRsmaTolerance", tsQueryRsmaTolerance, 0, 900000, 0) != 0) return -1; - - if (cfgAddInt64(pCfg, "walFsyncDataSizeLimit", tsWalFsyncDataSizeLimit, 100 * 1024 * 1024, INT64_MAX, 0) != 0) + if (cfgAddInt32(pCfg, "syncElectInterval", tsElectInterval, 10, 1000 * 60 * 24 * 2, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "syncHeartbeatInterval", tsHeartbeatInterval, 10, 1000 * 60 * 24 * 2, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "syncHeartbeatTimeout", tsHeartbeatTimeout, 10, 1000 * 60 * 24 * 2, CFG_SCOPE_SERVER) != 0) return -1; + + if (cfgAddInt64(pCfg, "vndCommitMaxInterval", tsVndCommitMaxIntervalMs, 1000, 1000 * 60 * 60, CFG_SCOPE_SERVER) != 0) return -1; + + if (cfgAddInt64(pCfg, "mndSdbWriteDelta", tsMndSdbWriteDelta, 20, 10000, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddInt64(pCfg, "mndLogRetention", tsMndLogRetention, 500, 10000, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "grantMode", tsMndGrantMode, 0, 10000, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddBool(pCfg, "skipGrant", tsMndSkipGrant, CFG_SCOPE_SERVER) != 0) return -1; + + if (cfgAddBool(pCfg, "monitor", tsEnableMonitor, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "monitorInterval", tsMonitorInterval, 1, 200000, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddString(pCfg, "monitorFqdn", tsMonitorFqdn, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "monitorPort", tsMonitorPort, 1, 65056, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "monitorMaxLogs", tsMonitorMaxLogs, 1, 1000000, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddBool(pCfg, "monitorComp", tsMonitorComp, CFG_SCOPE_SERVER) != 0) return -1; + + if (cfgAddBool(pCfg, "crashReporting", tsEnableCrashReport, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddBool(pCfg, "telemetryReporting", tsEnableTelem, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddInt32(pCfg, "telemetryInterval", tsTelemInterval, 1, 200000, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddString(pCfg, "telemetryServer", tsTelemServer, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddInt32(pCfg, "telemetryPort", tsTelemPort, 1, 65056, CFG_SCOPE_BOTH) != 0) return -1; + + if (cfgAddInt32(pCfg, "tmqMaxTopicNum", tmqMaxTopicNum, 1, 10000, CFG_SCOPE_SERVER) != 0) return -1; + + if (cfgAddInt32(pCfg, "transPullupInterval", tsTransPullupInterval, 1, 10000, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "mqRebalanceInterval", tsMqRebalanceInterval, 1, 10000, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "ttlUnit", tsTtlUnit, 1, 86400 * 365, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "ttlPushInterval", tsTtlPushInterval, 1, 100000, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddBool(pCfg, "ttlChangeOnWrite", tsTtlChangeOnWrite, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "uptimeInterval", tsUptimeInterval, 1, 100000, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "queryRsmaTolerance", tsQueryRsmaTolerance, 0, 900000, CFG_SCOPE_SERVER) != 0) return -1; + + if (cfgAddInt64(pCfg, "walFsyncDataSizeLimit", tsWalFsyncDataSizeLimit, 100 * 1024 * 1024, INT64_MAX, CFG_SCOPE_SERVER) != 0) return -1; - if (cfgAddBool(pCfg, "udf", tsStartUdfd, 0) != 0) return -1; - if (cfgAddString(pCfg, "udfdResFuncs", tsUdfdResFuncs, 0) != 0) return -1; - if (cfgAddString(pCfg, "udfdLdLibPath", tsUdfdLdLibPath, 0) != 0) return -1; + if (cfgAddBool(pCfg, "udf", tsStartUdfd, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddString(pCfg, "udfdResFuncs", tsUdfdResFuncs, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddString(pCfg, "udfdLdLibPath", tsUdfdLdLibPath, CFG_SCOPE_SERVER) != 0) return -1; - if (cfgAddBool(pCfg, "disableStream", tsDisableStream, 0) != 0) return -1; - if (cfgAddInt64(pCfg, "streamBufferSize", tsStreamBufferSize, 0, INT64_MAX, 0) != 0) return -1; - if (cfgAddInt64(pCfg, "checkpointInterval", tsCheckpointInterval, 0, INT64_MAX, 0) != 0) return -1; + if (cfgAddBool(pCfg, "disableStream", tsDisableStream, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddInt64(pCfg, "streamBufferSize", tsStreamBufferSize, 0, INT64_MAX, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddInt64(pCfg, "checkpointInterval", tsCheckpointInterval, 0, INT64_MAX, CFG_SCOPE_SERVER) != 0) return -1; - if (cfgAddInt32(pCfg, "cacheLazyLoadThreshold", tsCacheLazyLoadThreshold, 0, 100000, 0) != 0) return -1; + if (cfgAddInt32(pCfg, "cacheLazyLoadThreshold", tsCacheLazyLoadThreshold, 0, 100000, CFG_SCOPE_SERVER) != 0) return -1; - if (cfgAddBool(pCfg, "filterScalarMode", tsFilterScalarMode, 0) != 0) return -1; - if (cfgAddInt32(pCfg, "keepTimeOffset", tsKeepTimeOffset, 0, 23, 0) != 0) return -1; - if (cfgAddInt32(pCfg, "maxStreamBackendCache", tsMaxStreamBackendCache, 16, 1024, 0) != 0) return -1; - if (cfgAddInt32(pCfg, "pqSortMemThreshold", tsPQSortMemThreshold, 1, 10240, 0) != 0) return -1; + if (cfgAddBool(pCfg, "filterScalarMode", tsFilterScalarMode, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "keepTimeOffset", tsKeepTimeOffset, 0, 23, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "maxStreamBackendCache", tsMaxStreamBackendCache, 16, 1024, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "pqSortMemThreshold", tsPQSortMemThreshold, 1, 10240, CFG_SCOPE_SERVER) != 0) return -1; GRANT_CFG_ADD; return 0; @@ -795,6 +805,8 @@ static int32_t taosSetClientCfg(SConfig *pCfg) { tstrncpy(tsSmlChildTableName, cfgGetItem(pCfg, "smlChildTableName")->str, TSDB_TABLE_NAME_LEN); tstrncpy(tsSmlTagName, cfgGetItem(pCfg, "smlTagName")->str, TSDB_COL_NAME_LEN); + tstrncpy(tsSmlTsDefaultName, cfgGetItem(pCfg, "smlTsDefaultName")->str, TSDB_COL_NAME_LEN); + tsSmlDot2Underline = cfgGetItem(pCfg, "smlDot2Underline")->bval; // tsSmlDataFormat = cfgGetItem(pCfg, "smlDataFormat")->bval; // tsSmlBatchSize = cfgGetItem(pCfg, "smlBatchSize")->i32; @@ -916,6 +928,7 @@ static int32_t taosSetServerCfg(SConfig *pCfg) { tsMndSdbWriteDelta = cfgGetItem(pCfg, "mndSdbWriteDelta")->i64; tsMndLogRetention = cfgGetItem(pCfg, "mndLogRetention")->i64; tsMndSkipGrant = cfgGetItem(pCfg, "skipGrant")->bval; + tsMndGrantMode = cfgGetItem(pCfg, "grantMode")->i32; tsStartUdfd = cfgGetItem(pCfg, "udf")->bval; tstrncpy(tsUdfdResFuncs, cfgGetItem(pCfg, "udfdResFuncs")->str, sizeof(tsUdfdResFuncs)); @@ -1236,6 +1249,10 @@ int32_t taosApplyLocalCfg(SConfig *pCfg, char *name) { // tsSmlDataFormat = cfgGetItem(pCfg, "smlDataFormat")->bval; // } else if (strcasecmp("smlBatchSize", name) == 0) { // tsSmlBatchSize = cfgGetItem(pCfg, "smlBatchSize")->i32; + } else if(strcasecmp("smlTsDefaultName", name) == 0) { + tstrncpy(tsSmlTsDefaultName, cfgGetItem(pCfg, "smlTsDefaultName")->str, TSDB_COL_NAME_LEN); + } else if(strcasecmp("smlDot2Underline", name) == 0) { + tsSmlDot2Underline = cfgGetItem(pCfg, "smlDot2Underline")->bval; } else if (strcasecmp("shellActivityTimer", name) == 0) { tsShellActivityTimer = cfgGetItem(pCfg, "shellActivityTimer")->i32; } else if (strcasecmp("supportVnodes", name) == 0) { diff --git a/source/common/src/tmsg.c b/source/common/src/tmsg.c index 835bb5553be8cebde063e48a9a32556b15b75910..ba65fba4c654cfd405535dcc755bb8b9a77058f2 100644 --- a/source/common/src/tmsg.c +++ b/source/common/src/tmsg.c @@ -1101,6 +1101,9 @@ int32_t tSerializeSStatusReq(void *buf, int32_t bufLen, SStatusReq *pReq) { if (tEncodeI64(&encoder, pReq->qload.timeInFetchQueue) < 0) return -1; if (tEncodeI32(&encoder, pReq->statusSeq) < 0) return -1; + if (tEncodeI64(&encoder, pReq->mload.syncTerm) < 0) return -1; + if (tEncodeI64(&encoder, pReq->mload.roleTimeMs) < 0) return -1; + if (tEncodeI8(&encoder, pReq->clusterCfg.ttlChangeOnWrite) < 0) return -1; tEndEncode(&encoder); int32_t tlen = encoder.pos; @@ -1157,7 +1160,7 @@ int32_t tDeserializeSStatusReq(void *buf, int32_t bufLen, SStatusReq *pReq) { if (tDecodeI64(&decoder, &vload.compStorage) < 0) return -1; if (tDecodeI64(&decoder, &vload.pointsWritten) < 0) return -1; if (tDecodeI32(&decoder, &vload.numOfCachedTables) < 0) return -1; - if (tDecodeI32(&decoder, &vload.learnerProgress) < 0) return -1; + if (tDecodeI32(&decoder, &vload.learnerProgress) < 0) return -1 if (tDecodeI64(&decoder, &reserved) < 0) return -1; if (tDecodeI64(&decoder, &reserved) < 0) return -1; if (taosArrayPush(pReq->pVloads, &vload) == NULL) { @@ -1183,6 +1186,19 @@ int32_t tDeserializeSStatusReq(void *buf, int32_t bufLen, SStatusReq *pReq) { if (tDecodeI64(&decoder, &pReq->qload.timeInFetchQueue) < 0) return -1; if (tDecodeI32(&decoder, &pReq->statusSeq) < 0) return -1; + + pReq->mload.syncTerm = -1; + pReq->mload.roleTimeMs = 0; + if (!tDecodeIsEnd(&decoder)) { + if (tDecodeI64(&decoder, &pReq->mload.syncTerm) < 0) return -1; + if (tDecodeI64(&decoder, &pReq->mload.roleTimeMs) < 0) return -1; + } + + pReq->clusterCfg.ttlChangeOnWrite = false; + if (!tDecodeIsEnd(&decoder)) { + if (tDecodeI8(&decoder, &pReq->clusterCfg.ttlChangeOnWrite) < 0) return -1; + } + tEndDecode(&decoder); tDecoderClear(&decoder); return 0; @@ -1547,6 +1563,7 @@ int32_t tSerializeSGetUserAuthRsp(void *buf, int32_t bufLen, SGetUserAuthRsp *pR } int32_t tDeserializeSGetUserAuthRspImpl(SDecoder *pDecoder, SGetUserAuthRsp *pRsp) { + char *key = NULL, *value = NULL; pRsp->createdDbs = taosHashInit(4, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK); pRsp->readDbs = taosHashInit(4, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK); pRsp->writeDbs = taosHashInit(4, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK); @@ -1555,40 +1572,40 @@ int32_t tDeserializeSGetUserAuthRspImpl(SDecoder *pDecoder, SGetUserAuthRsp *pRs pRsp->useDbs = taosHashInit(4, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK); if (pRsp->createdDbs == NULL || pRsp->readDbs == NULL || pRsp->writeDbs == NULL || pRsp->readTbs == NULL || pRsp->writeTbs == NULL || pRsp->useDbs == NULL) { - return -1; + goto _err; } - if (tDecodeCStrTo(pDecoder, pRsp->user) < 0) return -1; - if (tDecodeI8(pDecoder, &pRsp->superAuth) < 0) return -1; - if (tDecodeI8(pDecoder, &pRsp->sysInfo) < 0) return -1; - if (tDecodeI8(pDecoder, &pRsp->enable) < 0) return -1; - if (tDecodeI8(pDecoder, &pRsp->reserve) < 0) return -1; - if (tDecodeI32(pDecoder, &pRsp->version) < 0) return -1; + if (tDecodeCStrTo(pDecoder, pRsp->user) < 0) goto _err; + if (tDecodeI8(pDecoder, &pRsp->superAuth) < 0) goto _err; + if (tDecodeI8(pDecoder, &pRsp->sysInfo) < 0) goto _err; + if (tDecodeI8(pDecoder, &pRsp->enable) < 0) goto _err; + if (tDecodeI8(pDecoder, &pRsp->reserve) < 0) goto _err; + if (tDecodeI32(pDecoder, &pRsp->version) < 0) goto _err; int32_t numOfCreatedDbs = 0; int32_t numOfReadDbs = 0; int32_t numOfWriteDbs = 0; - if (tDecodeI32(pDecoder, &numOfCreatedDbs) < 0) return -1; - if (tDecodeI32(pDecoder, &numOfReadDbs) < 0) return -1; - if (tDecodeI32(pDecoder, &numOfWriteDbs) < 0) return -1; + if (tDecodeI32(pDecoder, &numOfCreatedDbs) < 0) goto _err; + if (tDecodeI32(pDecoder, &numOfReadDbs) < 0) goto _err; + if (tDecodeI32(pDecoder, &numOfWriteDbs) < 0) goto _err; for (int32_t i = 0; i < numOfCreatedDbs; ++i) { char db[TSDB_DB_FNAME_LEN] = {0}; - if (tDecodeCStrTo(pDecoder, db) < 0) return -1; + if (tDecodeCStrTo(pDecoder, db) < 0) goto _err; int32_t len = strlen(db); taosHashPut(pRsp->createdDbs, db, len, db, len + 1); } for (int32_t i = 0; i < numOfReadDbs; ++i) { char db[TSDB_DB_FNAME_LEN] = {0}; - if (tDecodeCStrTo(pDecoder, db) < 0) return -1; + if (tDecodeCStrTo(pDecoder, db) < 0) goto _err; int32_t len = strlen(db); taosHashPut(pRsp->readDbs, db, len, db, len + 1); } for (int32_t i = 0; i < numOfWriteDbs; ++i) { char db[TSDB_DB_FNAME_LEN] = {0}; - if (tDecodeCStrTo(pDecoder, db) < 0) return -1; + if (tDecodeCStrTo(pDecoder, db) < 0) goto _err; int32_t len = strlen(db); taosHashPut(pRsp->writeDbs, db, len, db, len + 1); } @@ -1597,67 +1614,80 @@ int32_t tDeserializeSGetUserAuthRspImpl(SDecoder *pDecoder, SGetUserAuthRsp *pRs int32_t numOfReadTbs = 0; int32_t numOfWriteTbs = 0; int32_t numOfUseDbs = 0; - if (tDecodeI32(pDecoder, &numOfReadTbs) < 0) return -1; - if (tDecodeI32(pDecoder, &numOfWriteTbs) < 0) return -1; - if (tDecodeI32(pDecoder, &numOfUseDbs) < 0) return -1; + if (tDecodeI32(pDecoder, &numOfReadTbs) < 0) goto _err; + if (tDecodeI32(pDecoder, &numOfWriteTbs) < 0) goto _err; + if (tDecodeI32(pDecoder, &numOfUseDbs) < 0) goto _err; for (int32_t i = 0; i < numOfReadTbs; ++i) { int32_t keyLen = 0; - if (tDecodeI32(pDecoder, &keyLen) < 0) return -1; + if (tDecodeI32(pDecoder, &keyLen) < 0) goto _err; - char *key = taosMemoryCalloc(keyLen + 1, sizeof(char)); - if (tDecodeCStrTo(pDecoder, key) < 0) return -1; + key = taosMemoryCalloc(keyLen + 1, sizeof(char)); + if (tDecodeCStrTo(pDecoder, key) < 0) goto _err; int32_t valuelen = 0; - if (tDecodeI32(pDecoder, &valuelen) < 0) return -1; - char *value = taosMemoryCalloc(valuelen + 1, sizeof(char)); - if (tDecodeCStrTo(pDecoder, value) < 0) return -1; + if (tDecodeI32(pDecoder, &valuelen) < 0) goto _err; + + value = taosMemoryCalloc(valuelen + 1, sizeof(char)); + if (tDecodeCStrTo(pDecoder, value) < 0) goto _err; taosHashPut(pRsp->readTbs, key, strlen(key), value, valuelen + 1); - taosMemoryFree(key); - taosMemoryFree(value); + taosMemoryFreeClear(key); + taosMemoryFreeClear(value); } for (int32_t i = 0; i < numOfWriteTbs; ++i) { int32_t keyLen = 0; - if (tDecodeI32(pDecoder, &keyLen) < 0) return -1; + if (tDecodeI32(pDecoder, &keyLen) < 0) goto _err; - char *key = taosMemoryCalloc(keyLen + 1, sizeof(char)); - if (tDecodeCStrTo(pDecoder, key) < 0) return -1; + key = taosMemoryCalloc(keyLen + 1, sizeof(char)); + if (tDecodeCStrTo(pDecoder, key) < 0) goto _err; int32_t valuelen = 0; - if (tDecodeI32(pDecoder, &valuelen) < 0) return -1; - char *value = taosMemoryCalloc(valuelen + 1, sizeof(char)); - if (tDecodeCStrTo(pDecoder, value) < 0) return -1; + if (tDecodeI32(pDecoder, &valuelen) < 0) goto _err; + + value = taosMemoryCalloc(valuelen + 1, sizeof(char)); + if (tDecodeCStrTo(pDecoder, value) < 0) goto _err; taosHashPut(pRsp->writeTbs, key, strlen(key), value, valuelen + 1); - taosMemoryFree(key); - taosMemoryFree(value); + taosMemoryFreeClear(key); + taosMemoryFreeClear(value); } for (int32_t i = 0; i < numOfUseDbs; ++i) { int32_t keyLen = 0; - if (tDecodeI32(pDecoder, &keyLen) < 0) return -1; + if (tDecodeI32(pDecoder, &keyLen) < 0) goto _err; - char *key = taosMemoryCalloc(keyLen + 1, sizeof(char)); - if (tDecodeCStrTo(pDecoder, key) < 0) return -1; + key = taosMemoryCalloc(keyLen + 1, sizeof(char)); + if (tDecodeCStrTo(pDecoder, key) < 0) goto _err; int32_t ref = 0; - if (tDecodeI32(pDecoder, &ref) < 0) return -1; + if (tDecodeI32(pDecoder, &ref) < 0) goto _err; + taosHashPut(pRsp->useDbs, key, strlen(key), &ref, sizeof(ref)); - taosMemoryFree(key); + taosMemoryFreeClear(key); } // since 3.0.7.0 if (!tDecodeIsEnd(pDecoder)) { - if (tDecodeI32(pDecoder, &pRsp->passVer) < 0) return -1; + if (tDecodeI32(pDecoder, &pRsp->passVer) < 0) goto _err; } else { pRsp->passVer = 0; } } - return 0; +_err: + taosHashCleanup(pRsp->createdDbs); + taosHashCleanup(pRsp->readDbs); + taosHashCleanup(pRsp->writeDbs); + taosHashCleanup(pRsp->writeTbs); + taosHashCleanup(pRsp->readTbs); + taosHashCleanup(pRsp->useDbs); + + taosMemoryFreeClear(key); + taosMemoryFreeClear(value); + return -1; } int32_t tDeserializeSGetUserAuthRsp(void *buf, int32_t bufLen, SGetUserAuthRsp *pRsp) { @@ -2846,7 +2876,6 @@ int32_t tSerializeSDbHbRspImp(SEncoder *pEncoder, const SDbHbRsp *pRsp) { return 0; } - int32_t tSerializeSDbHbBatchRsp(void *buf, int32_t bufLen, SDbHbBatchRsp *pRsp) { SEncoder encoder = {0}; tEncoderInit(&encoder, buf, bufLen); @@ -2910,7 +2939,7 @@ int32_t tDeserializeSUseDbRsp(void *buf, int32_t bufLen, SUseDbRsp *pRsp) { return 0; } -int32_t tDeserializeSDbHbRspImp(SDecoder* decoder, SDbHbRsp* pRsp) { +int32_t tDeserializeSDbHbRspImp(SDecoder *decoder, SDbHbRsp *pRsp) { int8_t flag = 0; if (tDecodeI8(decoder, &flag) < 0) return -1; if (flag) { @@ -3198,7 +3227,7 @@ int32_t tSerializeSDbCfgRsp(void *buf, int32_t bufLen, const SDbCfgRsp *pRsp) { return tlen; } -int32_t tDeserializeSDbCfgRspImpl(SDecoder* decoder, SDbCfgRsp *pRsp) { +int32_t tDeserializeSDbCfgRspImpl(SDecoder *decoder, SDbCfgRsp *pRsp) { if (tDecodeCStrTo(decoder, pRsp->db) < 0) return -1; if (tDecodeI64(decoder, &pRsp->dbId) < 0) return -1; if (tDecodeI32(decoder, &pRsp->cfgVersion) < 0) return -1; @@ -3484,12 +3513,14 @@ int32_t tDeserializeSShowVariablesReq(void *buf, int32_t bufLen, SShowVariablesR int32_t tEncodeSVariablesInfo(SEncoder *pEncoder, SVariablesInfo *pInfo) { if (tEncodeCStr(pEncoder, pInfo->name) < 0) return -1; if (tEncodeCStr(pEncoder, pInfo->value) < 0) return -1; + if (tEncodeCStr(pEncoder, pInfo->scope) < 0) return -1; return 0; } int32_t tDecodeSVariablesInfo(SDecoder *pDecoder, SVariablesInfo *pInfo) { if (tDecodeCStrTo(pDecoder, pInfo->name) < 0) return -1; if (tDecodeCStrTo(pDecoder, pInfo->value) < 0) return -1; + if (tDecodeCStrTo(pDecoder, pInfo->scope) < 0) return -1; return 0; } @@ -5318,10 +5349,10 @@ int32_t tDeserializeSMqAskEpReq(void *buf, int32_t bufLen, SMqAskEpReq *pReq) { return 0; } -int32_t tDeatroySMqHbReq(SMqHbReq* pReq){ - for(int i = 0; i < taosArrayGetSize(pReq->topics); i++){ - TopicOffsetRows* vgs = taosArrayGet(pReq->topics, i); - if(vgs) taosArrayDestroy(vgs->offsetRows); +int32_t tDeatroySMqHbReq(SMqHbReq *pReq) { + for (int i = 0; i < taosArrayGetSize(pReq->topics); i++) { + TopicOffsetRows *vgs = taosArrayGet(pReq->topics, i); + if (vgs) taosArrayDestroy(vgs->offsetRows); } taosArrayDestroy(pReq->topics); return 0; @@ -5338,7 +5369,7 @@ int32_t tSerializeSMqHbReq(void *buf, int32_t bufLen, SMqHbReq *pReq) { int32_t sz = taosArrayGetSize(pReq->topics); if (tEncodeI32(&encoder, sz) < 0) return -1; for (int32_t i = 0; i < sz; ++i) { - TopicOffsetRows* vgs = (TopicOffsetRows*)taosArrayGet(pReq->topics, i); + TopicOffsetRows *vgs = (TopicOffsetRows *)taosArrayGet(pReq->topics, i); if (tEncodeCStr(&encoder, vgs->topicName) < 0) return -1; int32_t szVgs = taosArrayGetSize(vgs->offsetRows); if (tEncodeI32(&encoder, szVgs) < 0) return -1; @@ -5368,19 +5399,19 @@ int32_t tDeserializeSMqHbReq(void *buf, int32_t bufLen, SMqHbReq *pReq) { if (tDecodeI32(&decoder, &pReq->epoch) < 0) return -1; int32_t sz = 0; if (tDecodeI32(&decoder, &sz) < 0) return -1; - if(sz > 0){ + if (sz > 0) { pReq->topics = taosArrayInit(sz, sizeof(TopicOffsetRows)); if (NULL == pReq->topics) return -1; for (int32_t i = 0; i < sz; ++i) { - TopicOffsetRows* data = taosArrayReserve(pReq->topics, 1); + TopicOffsetRows *data = taosArrayReserve(pReq->topics, 1); tDecodeCStrTo(&decoder, data->topicName); int32_t szVgs = 0; if (tDecodeI32(&decoder, &szVgs) < 0) return -1; - if(szVgs > 0){ + if (szVgs > 0) { data->offsetRows = taosArrayInit(szVgs, sizeof(OffsetRows)); if (NULL == data->offsetRows) return -1; - for (int32_t j= 0; j < szVgs; ++j) { - OffsetRows* offRows = taosArrayReserve(data->offsetRows, 1); + for (int32_t j = 0; j < szVgs; ++j) { + OffsetRows *offRows = taosArrayReserve(data->offsetRows, 1); if (tDecodeI32(&decoder, &offRows->vgId) < 0) return -1; if (tDecodeI64(&decoder, &offRows->rows) < 0) return -1; if (tDecodeSTqOffsetVal(&decoder, &offRows->offset) < 0) return -1; @@ -5394,6 +5425,47 @@ int32_t tDeserializeSMqHbReq(void *buf, int32_t bufLen, SMqHbReq *pReq) { return 0; } +int32_t tSerializeSMqSeekReq(void *buf, int32_t bufLen, SMqSeekReq *pReq) { + int32_t headLen = sizeof(SMsgHead); + if (buf != NULL) { + buf = (char *)buf + headLen; + bufLen -= headLen; + } + SEncoder encoder = {0}; + tEncoderInit(&encoder, buf, bufLen); + if (tStartEncode(&encoder) < 0) return -1; + if (tEncodeI64(&encoder, pReq->consumerId) < 0) return -1; + if (tEncodeCStr(&encoder, pReq->subKey) < 0) return -1; + tEndEncode(&encoder); + + int32_t tlen = encoder.pos; + tEncoderClear(&encoder); + + if (buf != NULL) { + SMsgHead *pHead = (SMsgHead *)((char *)buf - headLen); + pHead->vgId = htonl(pReq->head.vgId); + pHead->contLen = htonl(tlen + headLen); + } + + return tlen + headLen; +} + +int32_t tDeserializeSMqSeekReq(void *buf, int32_t bufLen, SMqSeekReq *pReq) { + int32_t headLen = sizeof(SMsgHead); + + SDecoder decoder = {0}; + tDecoderInit(&decoder, (char *)buf + headLen, bufLen - headLen); + + if (tStartDecode(&decoder) < 0) return -1; + if (tDecodeI64(&decoder, &pReq->consumerId) < 0) return -1; + tDecodeCStrTo(&decoder, pReq->subKey); + + tEndDecode(&decoder); + + tDecoderClear(&decoder); + return 0; +} + int32_t tSerializeSSubQueryMsg(void *buf, int32_t bufLen, SSubQueryMsg *pReq) { int32_t headLen = sizeof(SMsgHead); if (buf != NULL) { @@ -5580,9 +5652,9 @@ int32_t tSerializeSMqPollReq(void *buf, int32_t bufLen, SMqPollReq *pReq) { int32_t tDeserializeSMqPollReq(void *buf, int32_t bufLen, SMqPollReq *pReq) { int32_t headLen = sizeof(SMsgHead); -// SMsgHead *pHead = buf; -// pHead->vgId = pReq->head.vgId; -// pHead->contLen = pReq->head.contLen; + // SMsgHead *pHead = buf; + // pHead->vgId = pReq->head.vgId; + // pHead->contLen = pReq->head.contLen; SDecoder decoder = {0}; tDecoderInit(&decoder, (char *)buf + headLen, bufLen - headLen); @@ -6953,7 +7025,7 @@ int32_t tDecodeSVAlterTbReq(SDecoder *pDecoder, SVAlterTbReq *pReq) { return 0; } -int32_t tDecodeSVAlterTbReqSetCtime(SDecoder* pDecoder, SVAlterTbReq* pReq, int64_t ctimeMs) { +int32_t tDecodeSVAlterTbReqSetCtime(SDecoder *pDecoder, SVAlterTbReq *pReq, int64_t ctimeMs) { if (tStartDecode(pDecoder) < 0) return -1; if (tDecodeSVAlterTbReqCommon(pDecoder, pReq) < 0) return -1; @@ -7165,10 +7237,8 @@ bool tOffsetEqual(const STqOffsetVal *pLeft, const STqOffsetVal *pRight) { } else if (pLeft->type == TMQ_OFFSET__SNAPSHOT_META) { return pLeft->uid == pRight->uid; } else { + uError("offset type:%d", pLeft->type); ASSERT(0); - /*ASSERT(pLeft->type == TMQ_OFFSET__RESET_NONE || pLeft->type == TMQ_OFFSET__RESET_EARLIEST ||*/ - /*pLeft->type == TMQ_OFFSET__RESET_LATEST);*/ - /*return true;*/ } } return false; @@ -7186,13 +7256,13 @@ int32_t tDecodeSTqOffset(SDecoder *pDecoder, STqOffset *pOffset) { return 0; } -int32_t tEncodeMqVgOffset(SEncoder* pEncoder, const SMqVgOffset* pOffset) { +int32_t tEncodeMqVgOffset(SEncoder *pEncoder, const SMqVgOffset *pOffset) { if (tEncodeSTqOffset(pEncoder, &pOffset->offset) < 0) return -1; if (tEncodeI64(pEncoder, pOffset->consumerId) < 0) return -1; return 0; } -int32_t tDecodeMqVgOffset(SDecoder* pDecoder, SMqVgOffset* pOffset) { +int32_t tDecodeMqVgOffset(SDecoder *pDecoder, SMqVgOffset *pOffset) { if (tDecodeSTqOffset(pDecoder, &pOffset->offset) < 0) return -1; if (tDecodeI64(pDecoder, &pOffset->consumerId) < 0) return -1; return 0; @@ -7363,27 +7433,8 @@ void tDeleteMqDataRsp(SMqDataRsp *pRsp) { } int32_t tEncodeSTaosxRsp(SEncoder *pEncoder, const STaosxRsp *pRsp) { - if (tEncodeSTqOffsetVal(pEncoder, &pRsp->reqOffset) < 0) return -1; - if (tEncodeSTqOffsetVal(pEncoder, &pRsp->rspOffset) < 0) return -1; - if (tEncodeI32(pEncoder, pRsp->blockNum) < 0) return -1; - if (pRsp->blockNum != 0) { - if (tEncodeI8(pEncoder, pRsp->withTbName) < 0) return -1; - if (tEncodeI8(pEncoder, pRsp->withSchema) < 0) return -1; + if (tEncodeMqDataRsp(pEncoder, (const SMqDataRsp *)pRsp) < 0) return -1; - for (int32_t i = 0; i < pRsp->blockNum; i++) { - int32_t bLen = *(int32_t *)taosArrayGet(pRsp->blockDataLen, i); - void *data = taosArrayGetP(pRsp->blockData, i); - if (tEncodeBinary(pEncoder, (const uint8_t *)data, bLen) < 0) return -1; - if (pRsp->withSchema) { - SSchemaWrapper *pSW = (SSchemaWrapper *)taosArrayGetP(pRsp->blockSchema, i); - if (tEncodeSSchemaWrapper(pEncoder, pSW) < 0) return -1; - } - if (pRsp->withTbName) { - char *tbName = (char *)taosArrayGetP(pRsp->blockTbName, i); - if (tEncodeCStr(pEncoder, tbName) < 0) return -1; - } - } - } if (tEncodeI32(pEncoder, pRsp->createTableNum) < 0) return -1; if (pRsp->createTableNum) { for (int32_t i = 0; i < pRsp->createTableNum; i++) { @@ -7396,46 +7447,8 @@ int32_t tEncodeSTaosxRsp(SEncoder *pEncoder, const STaosxRsp *pRsp) { } int32_t tDecodeSTaosxRsp(SDecoder *pDecoder, STaosxRsp *pRsp) { - if (tDecodeSTqOffsetVal(pDecoder, &pRsp->reqOffset) < 0) return -1; - if (tDecodeSTqOffsetVal(pDecoder, &pRsp->rspOffset) < 0) return -1; - if (tDecodeI32(pDecoder, &pRsp->blockNum) < 0) return -1; - if (pRsp->blockNum != 0) { - pRsp->blockData = taosArrayInit(pRsp->blockNum, sizeof(void *)); - pRsp->blockDataLen = taosArrayInit(pRsp->blockNum, sizeof(int32_t)); - if (tDecodeI8(pDecoder, &pRsp->withTbName) < 0) return -1; - if (tDecodeI8(pDecoder, &pRsp->withSchema) < 0) return -1; - if (pRsp->withTbName) { - pRsp->blockTbName = taosArrayInit(pRsp->blockNum, sizeof(void *)); - } - if (pRsp->withSchema) { - pRsp->blockSchema = taosArrayInit(pRsp->blockNum, sizeof(void *)); - } + if (tDecodeMqDataRsp(pDecoder, (SMqDataRsp *)pRsp) < 0) return -1; - for (int32_t i = 0; i < pRsp->blockNum; i++) { - void *data; - uint64_t bLen; - if (tDecodeBinaryAlloc(pDecoder, &data, &bLen) < 0) return -1; - taosArrayPush(pRsp->blockData, &data); - int32_t len = bLen; - taosArrayPush(pRsp->blockDataLen, &len); - - if (pRsp->withSchema) { - SSchemaWrapper *pSW = (SSchemaWrapper *)taosMemoryCalloc(1, sizeof(SSchemaWrapper)); - if (pSW == NULL) return -1; - if (tDecodeSSchemaWrapper(pDecoder, pSW) < 0) { - taosMemoryFree(pSW); - return -1; - } - taosArrayPush(pRsp->blockSchema, &pSW); - } - - if (pRsp->withTbName) { - char *tbName; - if (tDecodeCStrAlloc(pDecoder, &tbName) < 0) return -1; - taosArrayPush(pRsp->blockTbName, &tbName); - } - } - } if (tDecodeI32(pDecoder, &pRsp->createTableNum) < 0) return -1; if (pRsp->createTableNum) { pRsp->createTableLen = taosArrayInit(pRsp->createTableNum, sizeof(int32_t)); diff --git a/source/dnode/mgmt/exe/dmMain.c b/source/dnode/mgmt/exe/dmMain.c index e1b8a5768479d0d897b33abca9c1c64e9f68e507..3c08714218dfa861958e27080a23a407e7323eb5 100644 --- a/source/dnode/mgmt/exe/dmMain.c +++ b/source/dnode/mgmt/exe/dmMain.c @@ -373,6 +373,8 @@ int mainWindows(int argc, char **argv) { dInfo("start to init service"); dmSetSignalHandle(); + tsDndStart = taosGetTimestampMs(); + tsDndStartOsUptime = taosGetOsUptime(); int32_t code = dmRun(); dInfo("shutting down the service"); diff --git a/source/dnode/mgmt/mgmt_dnode/src/dmHandle.c b/source/dnode/mgmt/mgmt_dnode/src/dmHandle.c index 19982698968c8c91bc3f53edd3868f173b070005..1bce20ff44627c814c9ce677408e7ae3e12ed3ba 100644 --- a/source/dnode/mgmt/mgmt_dnode/src/dmHandle.c +++ b/source/dnode/mgmt/mgmt_dnode/src/dmHandle.c @@ -90,6 +90,7 @@ void dmSendStatusReq(SDnodeMgmt *pMgmt) { req.clusterCfg.statusInterval = tsStatusInterval; req.clusterCfg.checkTime = 0; + req.clusterCfg.ttlChangeOnWrite = tsTtlChangeOnWrite; char timestr[32] = "1970-01-01 00:00:00.00"; (void)taosParseTime(timestr, &req.clusterCfg.checkTime, (int32_t)strlen(timestr), TSDB_TIME_PRECISION_MILLI, 0); memcpy(req.clusterCfg.timezone, tsTimezoneStr, TD_TIMEZONE_LEN); @@ -265,6 +266,12 @@ int32_t dmAppendVariablesToBlock(SSDataBlock *pBlock, int32_t dnodeId) { pColInfo = taosArrayGet(pBlock->pDataBlock, c++); colDataSetVal(pColInfo, i, value, false); + char scope[TSDB_CONFIG_SCOPE_LEN + VARSTR_HEADER_SIZE] = {0}; + cfgDumpItemScope(pItem, &scope[VARSTR_HEADER_SIZE], TSDB_CONFIG_SCOPE_LEN, &valueLen); + varDataSetLen(scope, valueLen); + pColInfo = taosArrayGet(pBlock->pDataBlock, c++); + colDataSetVal(pColInfo, i, scope, false); + numOfRows++; } diff --git a/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c b/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c index 89c394fdd0889a30b737b43f534aec96e3fb3afa..76cb65b53a850b5b597c17806b501c88174380be 100644 --- a/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c +++ b/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c @@ -24,12 +24,16 @@ static void *dmStatusThreadFp(void *param) { const static int16_t TRIM_FREQ = 30; int32_t trimCount = 0; + int32_t upTimeCount = 0; + int64_t upTime = 0; + while (1) { taosMsleep(200); if (pMgmt->pData->dropped || pMgmt->pData->stopped) break; int64_t curTime = taosGetTimestampMs(); - float interval = (curTime - lastTime) / 1000.0f; + if (curTime < lastTime) lastTime = curTime; + float interval = (curTime - lastTime) / 1000.0f; if (interval >= tsStatusInterval) { dmSendStatusReq(pMgmt); lastTime = curTime; @@ -38,6 +42,11 @@ static void *dmStatusThreadFp(void *param) { if (trimCount == 0) { taosMemoryTrim(0); } + + if ((upTimeCount = ((upTimeCount + 1) & 63)) == 0) { + upTime = taosGetOsUptime() - tsDndStartOsUptime; + tsDndUpTime = TMAX(tsDndUpTime, upTime); + } } } @@ -54,7 +63,8 @@ static void *dmMonitorThreadFp(void *param) { if (pMgmt->pData->dropped || pMgmt->pData->stopped) break; int64_t curTime = taosGetTimestampMs(); - float interval = (curTime - lastTime) / 1000.0f; + if (curTime < lastTime) lastTime = curTime; + float interval = (curTime - lastTime) / 1000.0f; if (interval >= tsMonitorInterval) { (*pMgmt->sendMonitorReportFp)(); lastTime = curTime; diff --git a/source/dnode/mgmt/mgmt_snode/src/smHandle.c b/source/dnode/mgmt/mgmt_snode/src/smHandle.c index b2fb7243ff28a116dbe6bf8b97a57e16aee5207b..8206b4e4258a0eacb7eb91b5cde66a4bcb53f3d2 100644 --- a/source/dnode/mgmt/mgmt_snode/src/smHandle.c +++ b/source/dnode/mgmt/mgmt_snode/src/smHandle.c @@ -79,6 +79,7 @@ SArray *smGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECK, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECK_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_SCAN_HISTORY_FINISH, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_SCAN_HISTORY_FINISH_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; code = 0; _OVER: diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c index adceeb9ec3acc66eaf7bf5071a62fae1366cdcfb..4fd8f3950a1d9e143160789bae070da94b79069e 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c @@ -770,12 +770,13 @@ SArray *vmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_VND_TMQ_SUBSCRIBE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_TMQ_DELETE_SUB, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_TMQ_COMMIT_OFFSET, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_VND_TMQ_SEEK_TO_OFFSET, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_TMQ_SEEK, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_TMQ_ADD_CHECKINFO, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_TMQ_DEL_CHECKINFO, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_TMQ_CONSUME, vmPutMsgToQueryQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_TMQ_CONSUME_PUSH, vmPutMsgToQueryQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_TMQ_VG_WALINFO, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_TMQ_VG_COMMITTEDINFO, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_DELETE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_BATCH_DEL, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_COMMIT, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; @@ -791,6 +792,7 @@ SArray *vmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_STREAM_RETRIEVE, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_RETRIEVE_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_SCAN_HISTORY_FINISH, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_SCAN_HISTORY_FINISH_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TRANSFER_STATE, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECK, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECK_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; diff --git a/source/dnode/mgmt/node_mgmt/src/dmEnv.c b/source/dnode/mgmt/node_mgmt/src/dmEnv.c index 848e123448a3733d2c675bcebed82b7b261088db..3f9c5bbeaf8f7481069e252efe19ac84eeb14e41 100644 --- a/source/dnode/mgmt/node_mgmt/src/dmEnv.c +++ b/source/dnode/mgmt/node_mgmt/src/dmEnv.c @@ -42,7 +42,7 @@ static SDnode globalDnode = {0}; static const char *dmOS[10] = {"Ubuntu", "CentOS Linux", "Red Hat", "Debian GNU", "CoreOS", - "FreeBSD", "openSUSE", "SLES", "Fedora", "MacOS"}; + "FreeBSD", "openSUSE", "SLES", "Fedora", "macOS"}; SDnode *dmInstance() { return &globalDnode; } diff --git a/source/dnode/mgmt/node_mgmt/src/dmNodes.c b/source/dnode/mgmt/node_mgmt/src/dmNodes.c index 19d5e06c5b6d118feaa8bf6d50fe222e7a73007f..a8bf5be3e21136ddb290adbd6215e35e5a7f2d3b 100644 --- a/source/dnode/mgmt/node_mgmt/src/dmNodes.c +++ b/source/dnode/mgmt/node_mgmt/src/dmNodes.c @@ -41,7 +41,7 @@ int32_t dmOpenNode(SMgmtWrapper *pWrapper) { pWrapper->pMgmt = output.pMgmt; } - dmReportStartup(pWrapper->name, "openned"); + dmReportStartup(pWrapper->name, "opened"); return 0; } @@ -159,7 +159,7 @@ int32_t dmRunDnode(SDnode *pDnode) { } else { count++; } - + taosMsleep(100); } } diff --git a/source/dnode/mgmt/node_mgmt/src/dmTransport.c b/source/dnode/mgmt/node_mgmt/src/dmTransport.c index 5d6d16ccf84f73a0c45193cc9b15dda3bf59adcc..df54f8abbad86a9c23c96c0b533bbd8f432ae7a3 100644 --- a/source/dnode/mgmt/node_mgmt/src/dmTransport.c +++ b/source/dnode/mgmt/node_mgmt/src/dmTransport.c @@ -290,6 +290,7 @@ int32_t dmInitClient(SDnode *pDnode) { rpcInit.cfp = (RpcCfp)dmProcessRpcMsg; rpcInit.sessions = 1024; rpcInit.connType = TAOS_CONN_CLIENT; + rpcInit.user = TSDB_DEFAULT_USER; rpcInit.idleTime = tsShellActivityTimer * 1000; rpcInit.parent = pDnode; rpcInit.rfp = rpcRfp; diff --git a/source/dnode/mnode/impl/inc/mndCluster.h b/source/dnode/mnode/impl/inc/mndCluster.h index 2cb41edd7c1d37c8dab6f0e276259e9cc530fea8..e33ffdb372d0c317f5478add89b587bbe91562a9 100644 --- a/source/dnode/mnode/impl/inc/mndCluster.h +++ b/source/dnode/mnode/impl/inc/mndCluster.h @@ -27,7 +27,7 @@ void mndCleanupCluster(SMnode *pMnode); int32_t mndGetClusterName(SMnode *pMnode, char *clusterName, int32_t len); int64_t mndGetClusterId(SMnode *pMnode); int64_t mndGetClusterCreateTime(SMnode *pMnode); -float mndGetClusterUpTime(SMnode *pMnode); +int64_t mndGetClusterUpTime(SMnode *pMnode); #ifdef __cplusplus } diff --git a/source/dnode/mnode/impl/inc/mndDef.h b/source/dnode/mnode/impl/inc/mndDef.h index ffc099cc881248611441ae8895e554b1e6556b61..3c27fa90dba8eaba55765e411c1d3ef282883f2e 100644 --- a/source/dnode/mnode/impl/inc/mndDef.h +++ b/source/dnode/mnode/impl/inc/mndDef.h @@ -133,16 +133,16 @@ typedef enum { DND_REASON_TIME_ZONE_NOT_MATCH, DND_REASON_LOCALE_NOT_MATCH, DND_REASON_CHARSET_NOT_MATCH, + DND_REASON_TTL_CHANGE_ON_WRITE_NOT_MATCH, DND_REASON_OTHERS } EDndReason; typedef enum { - CONSUMER_UPDATE_REB_MODIFY_NOTOPIC = 1, // topic do not need modified after rebalance - CONSUMER_UPDATE_REB_MODIFY_TOPIC, // topic need modified after rebalance - CONSUMER_UPDATE_REB_MODIFY_REMOVE, // topic need removed after rebalance -// CONSUMER_UPDATE_TIMER_LOST, - CONSUMER_UPDATE_RECOVER, - CONSUMER_UPDATE_SUB_MODIFY, // modify after subscribe req + CONSUMER_UPDATE_REB = 1, // update after rebalance + CONSUMER_ADD_REB, // add after rebalance + CONSUMER_REMOVE_REB, // remove after rebalance + CONSUMER_UPDATE_REC, // update after recover + CONSUMER_UPDATE_SUB, // update after subscribe req } ECsmUpdateType; typedef struct { @@ -216,8 +216,9 @@ typedef struct { int64_t createdTime; int64_t updateTime; ESyncState syncState; + SyncTerm syncTerm; bool syncRestore; - int64_t stateStartTime; + int64_t roleTimeMs; SDnodeObj* pDnode; int32_t role; SyncIndex lastIndex; diff --git a/source/dnode/mnode/impl/inc/mndScheduler.h b/source/dnode/mnode/impl/inc/mndScheduler.h index 14517a99d39b6c4b1a18b7a3910d5c9d134da59c..cba52c6b45a14cc6c5fa434279b2c3f1ccecc4ff 100644 --- a/source/dnode/mnode/impl/inc/mndScheduler.h +++ b/source/dnode/mnode/impl/inc/mndScheduler.h @@ -22,11 +22,8 @@ extern "C" { #endif -int32_t mndInitScheduler(SMnode* pMnode); -void mndCleanupScheduler(SMnode* pMnode); int32_t mndSchedInitSubEp(SMnode* pMnode, const SMqTopicObj* pTopic, SMqSubscribeObj* pSub); - int32_t mndConvertRsmaTask(char** pDst, int32_t* pDstLen, const char* ast, int64_t uid, int8_t triggerType, int64_t watermark, int64_t deleteMark); diff --git a/source/dnode/mnode/impl/inc/mndStb.h b/source/dnode/mnode/impl/inc/mndStb.h index 99af413539b850eb4f62808fdf7899ab679933f0..db960d790f708e76ce2921f4ad6b6fefba82e441 100644 --- a/source/dnode/mnode/impl/inc/mndStb.h +++ b/source/dnode/mnode/impl/inc/mndStb.h @@ -39,6 +39,7 @@ int32_t mndBuildSMCreateStbRsp(SMnode *pMnode, char *dbFName, char *stbFName, vo void mndExtractDbNameFromStbFullName(const char *stbFullName, char *dst); void mndExtractShortDbNameFromStbFullName(const char *stbFullName, char *dst); +void mndExtractShortDbNameFromDbFullName(const char *stbFullName, char *dst); void mndExtractTbNameFromStbFullName(const char *stbFullName, char *dst, int32_t dstSize); const char *mndGetStbStr(const char *src); diff --git a/source/dnode/mnode/impl/inc/mndStream.h b/source/dnode/mnode/impl/inc/mndStream.h index f9f01c77ed942ee318d36c99487f9f7a65418543..05adc17d64fed3a87e48ac3ec4e4eaaff869a7f8 100644 --- a/source/dnode/mnode/impl/inc/mndStream.h +++ b/source/dnode/mnode/impl/inc/mndStream.h @@ -38,8 +38,6 @@ int32_t mndPersistStream(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream); int32_t mndDropStreamTasks(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream); int32_t mndPersistDropStreamLog(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream); -int32_t mndDropStreamByDb(SMnode *pMnode, STrans *pTrans, SDbObj *pDb); - int32_t mndGetNumOfStreams(SMnode *pMnode, char *dbName, int32_t *pNumOfStreams); #ifdef __cplusplus diff --git a/source/dnode/mnode/impl/src/mndCluster.c b/source/dnode/mnode/impl/src/mndCluster.c index 8ea98242f988d57928ad4767f66d1891f6da1335..4c799e1e1ea4b4ba54811cf3df5ef663afb80e7c 100644 --- a/source/dnode/mnode/impl/src/mndCluster.c +++ b/source/dnode/mnode/impl/src/mndCluster.c @@ -76,7 +76,6 @@ static SClusterObj *mndAcquireCluster(SMnode *pMnode, void **ppIter) { if (pIter == NULL) break; *ppIter = pIter; - return pCluster; } @@ -123,7 +122,7 @@ static int32_t mndGetClusterUpTimeImp(SClusterObj *pCluster) { #endif } -float mndGetClusterUpTime(SMnode *pMnode) { +int64_t mndGetClusterUpTime(SMnode *pMnode) { int64_t upTime = 0; void *pIter = NULL; SClusterObj *pCluster = mndAcquireCluster(pMnode, &pIter); @@ -132,7 +131,7 @@ float mndGetClusterUpTime(SMnode *pMnode) { mndReleaseCluster(pMnode, pCluster, pIter); } - return upTime / 86400.0f; + return upTime; } static SSdbRaw *mndClusterActionEncode(SClusterObj *pCluster) { diff --git a/source/dnode/mnode/impl/src/mndConsumer.c b/source/dnode/mnode/impl/src/mndConsumer.c index bdf9931ca25cc61c20580ccfe02cc8ae6eee87eb..82492f930ef29f54722a4dccc410dc828db6e70a 100644 --- a/source/dnode/mnode/impl/src/mndConsumer.c +++ b/source/dnode/mnode/impl/src/mndConsumer.c @@ -184,7 +184,7 @@ static int32_t mndProcessConsumerRecoverMsg(SRpcMsg *pMsg) { } SMqConsumerObj *pConsumerNew = tNewSMqConsumerObj(pConsumer->consumerId, pConsumer->cgroup); - pConsumerNew->updateType = CONSUMER_UPDATE_RECOVER; + pConsumerNew->updateType = CONSUMER_UPDATE_REC; mndReleaseConsumer(pMnode, pConsumer); @@ -281,7 +281,7 @@ static int32_t mndProcessMqTimerMsg(SRpcMsg *pMsg) { // rebalance cannot be parallel if (!mndRebTryStart()) { - mDebug("mq rebalance already in progress, do nothing"); + mInfo("mq rebalance already in progress, do nothing"); return 0; } @@ -312,7 +312,7 @@ static int32_t mndProcessMqTimerMsg(SRpcMsg *pMsg) { int32_t hbStatus = atomic_add_fetch_32(&pConsumer->hbStatus, 1); int32_t status = atomic_load_32(&pConsumer->status); - mDebug("check for consumer:0x%" PRIx64 " status:%d(%s), sub-time:%" PRId64 ", createTime:%" PRId64 ", hbstatus:%d", + mInfo("check for consumer:0x%" PRIx64 " status:%d(%s), sub-time:%" PRId64 ", createTime:%" PRId64 ", hbstatus:%d", pConsumer->consumerId, status, mndConsumerStatusName(status), pConsumer->subscribeTime, pConsumer->createTime, hbStatus); @@ -362,7 +362,7 @@ static int32_t mndProcessMqTimerMsg(SRpcMsg *pMsg) { } if (taosHashGetSize(pRebMsg->rebSubHash) != 0) { - mInfo("mq rebalance will be triggered"); + mInfo("mq rebalance will be triggered"); SRpcMsg rpcMsg = { .msgType = TDMT_MND_TMQ_DO_REBALANCE, .pCont = pRebMsg, @@ -416,10 +416,11 @@ static int32_t mndProcessMqHbReq(SRpcMsg *pMsg) { for(int i = 0; i < taosArrayGetSize(req.topics); i++){ TopicOffsetRows* data = taosArrayGet(req.topics, i); - mDebug("heartbeat report offset rows.%s:%s", pConsumer->cgroup, data->topicName); + mInfo("heartbeat report offset rows.%s:%s", pConsumer->cgroup, data->topicName); SMqSubscribeObj *pSub = mndAcquireSubscribe(pMnode, pConsumer->cgroup, data->topicName); if(pSub == NULL){ + ASSERT(0); continue; } taosWLockLatch(&pSub->lock); @@ -515,7 +516,10 @@ static int32_t mndProcessAskEpReq(SRpcMsg *pMsg) { char *topic = taosArrayGetP(pConsumer->currentTopics, i); SMqSubscribeObj *pSub = mndAcquireSubscribe(pMnode, pConsumer->cgroup, topic); // txn guarantees pSub is created - + if(pSub == NULL) { + ASSERT(0); + continue; + } taosRLockLatch(&pSub->lock); SMqSubTopicEp topicEp = {0}; @@ -523,6 +527,12 @@ static int32_t mndProcessAskEpReq(SRpcMsg *pMsg) { // 2.1 fetch topic schema SMqTopicObj *pTopic = mndAcquireTopic(pMnode, topic); + if(pTopic == NULL) { + ASSERT(0); + taosRUnLockLatch(&pSub->lock); + mndReleaseSubscribe(pMnode, pSub); + continue; + } taosRLockLatch(&pTopic->lock); tstrncpy(topicEp.db, pTopic->db, TSDB_DB_FNAME_LEN); topicEp.schema.nCols = pTopic->schema.nCols; @@ -701,7 +711,7 @@ int32_t mndProcessSubscribeReq(SRpcMsg *pMsg) { pConsumerNew->autoCommitInterval = subscribe.autoCommitInterval; pConsumerNew->resetOffsetCfg = subscribe.resetOffsetCfg; -// pConsumerNew->updateType = CONSUMER_UPDATE_SUB_MODIFY; // use insert logic +// pConsumerNew->updateType = CONSUMER_UPDATE_SUB; // use insert logic taosArrayDestroy(pConsumerNew->assignedTopics); pConsumerNew->assignedTopics = taosArrayDup(pTopicList, topicNameDup); @@ -731,7 +741,7 @@ int32_t mndProcessSubscribeReq(SRpcMsg *pMsg) { } // set the update type - pConsumerNew->updateType = CONSUMER_UPDATE_SUB_MODIFY; + pConsumerNew->updateType = CONSUMER_UPDATE_SUB; taosArrayDestroy(pConsumerNew->assignedTopics); pConsumerNew->assignedTopics = taosArrayDup(pTopicList, topicNameDup); @@ -893,7 +903,7 @@ static int32_t mndConsumerActionInsert(SSdb *pSdb, SMqConsumerObj *pConsumer) { mInfo("consumer:0x%" PRIx64 " sub insert, cgroup:%s status:%d(%s) epoch:%d", pConsumer->consumerId, pConsumer->cgroup, pConsumer->status, mndConsumerStatusName(pConsumer->status), pConsumer->epoch); - pConsumer->subscribeTime = taosGetTimestampMs(); + pConsumer->subscribeTime = pConsumer->createTime; return 0; } @@ -984,7 +994,7 @@ static int32_t mndConsumerActionUpdate(SSdb *pSdb, SMqConsumerObj *pOldConsumer, taosWLockLatch(&pOldConsumer->lock); - if (pNewConsumer->updateType == CONSUMER_UPDATE_SUB_MODIFY) { + if (pNewConsumer->updateType == CONSUMER_UPDATE_SUB) { TSWAP(pOldConsumer->rebNewTopics, pNewConsumer->rebNewTopics); TSWAP(pOldConsumer->rebRemovedTopics, pNewConsumer->rebRemovedTopics); TSWAP(pOldConsumer->assignedTopics, pNewConsumer->assignedTopics); @@ -1004,7 +1014,7 @@ static int32_t mndConsumerActionUpdate(SSdb *pSdb, SMqConsumerObj *pOldConsumer, // mInfo("consumer:0x%" PRIx64 " timer update, timer lost. state %s -> %s, reb-time:%" PRId64 ", reb-removed-topics:%d", // pOldConsumer->consumerId, mndConsumerStatusName(prevStatus), mndConsumerStatusName(pOldConsumer->status), // pOldConsumer->rebalanceTime, (int)taosArrayGetSize(pOldConsumer->rebRemovedTopics)); - } else if (pNewConsumer->updateType == CONSUMER_UPDATE_RECOVER) { + } else if (pNewConsumer->updateType == CONSUMER_UPDATE_REC) { int32_t sz = taosArrayGetSize(pOldConsumer->assignedTopics); for (int32_t i = 0; i < sz; i++) { char *topic = taosStrdup(taosArrayGetP(pOldConsumer->assignedTopics, i)); @@ -1013,12 +1023,12 @@ static int32_t mndConsumerActionUpdate(SSdb *pSdb, SMqConsumerObj *pOldConsumer, pOldConsumer->status = MQ_CONSUMER_STATUS_REBALANCE; mInfo("consumer:0x%" PRIx64 " timer update, timer recover",pOldConsumer->consumerId); - } else if (pNewConsumer->updateType == CONSUMER_UPDATE_REB_MODIFY_NOTOPIC) { + } else if (pNewConsumer->updateType == CONSUMER_UPDATE_REB) { atomic_add_fetch_32(&pOldConsumer->epoch, 1); pOldConsumer->rebalanceTime = taosGetTimestampMs(); mInfo("consumer:0x%" PRIx64 " reb update, only rebalance time", pOldConsumer->consumerId); - } else if (pNewConsumer->updateType == CONSUMER_UPDATE_REB_MODIFY_TOPIC) { + } else if (pNewConsumer->updateType == CONSUMER_ADD_REB) { char *pNewTopic = taosStrdup(taosArrayGetP(pNewConsumer->rebNewTopics, 0)); // check if exist in current topic @@ -1049,7 +1059,7 @@ static int32_t mndConsumerActionUpdate(SSdb *pSdb, SMqConsumerObj *pOldConsumer, (int)taosArrayGetSize(pOldConsumer->currentTopics), (int)taosArrayGetSize(pOldConsumer->rebNewTopics), (int)taosArrayGetSize(pOldConsumer->rebRemovedTopics)); - } else if (pNewConsumer->updateType == CONSUMER_UPDATE_REB_MODIFY_REMOVE) { + } else if (pNewConsumer->updateType == CONSUMER_REMOVE_REB) { char *removedTopic = taosArrayGetP(pNewConsumer->rebRemovedTopics, 0); // remove from removed topic @@ -1104,13 +1114,13 @@ static int32_t mndRetrieveConsumer(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock * } if (taosArrayGetSize(pConsumer->assignedTopics) == 0) { - mDebug("showing consumer:0x%" PRIx64 " no assigned topic, skip", pConsumer->consumerId); + mInfo("showing consumer:0x%" PRIx64 " no assigned topic, skip", pConsumer->consumerId); sdbRelease(pSdb, pConsumer); continue; } taosRLockLatch(&pConsumer->lock); - mDebug("showing consumer:0x%" PRIx64, pConsumer->consumerId); + mInfo("showing consumer:0x%" PRIx64, pConsumer->consumerId); int32_t topicSz = taosArrayGetSize(pConsumer->assignedTopics); bool hasTopic = true; diff --git a/source/dnode/mnode/impl/src/mndDb.c b/source/dnode/mnode/impl/src/mndDb.c index 67892ec57cf787ca34383391561686d116a931d7..e7a497de3606b278849bc9bb35dc74bffbc80cb2 100644 --- a/source/dnode/mnode/impl/src/mndDb.c +++ b/source/dnode/mnode/impl/src/mndDb.c @@ -1303,11 +1303,10 @@ static void mndBuildDBVgroupInfo(SDbObj *pDb, SMnode *pMnode, SArray *pVgList) { sdbRelease(pSdb, pVgroup); if (pDb && (vindex >= pDb->cfg.numOfVgroups)) { + sdbCancelFetch(pSdb, pIter); break; } } - - sdbCancelFetch(pSdb, pIter); } int32_t mndExtractDbInfo(SMnode *pMnode, SDbObj *pDb, SUseDbRsp *pRsp, const SUseDbReq *pReq) { diff --git a/source/dnode/mnode/impl/src/mndDnode.c b/source/dnode/mnode/impl/src/mndDnode.c index e899e6af82b6c33452a2a6c75fc5af2d74f56fd4..64a396794e9fbd0afb6abbe65924a806956c5cf9 100644 --- a/source/dnode/mnode/impl/src/mndDnode.c +++ b/source/dnode/mnode/impl/src/mndDnode.c @@ -41,6 +41,7 @@ static const char *offlineReason[] = { "timezone not match", "locale not match", "charset not match", + "ttl change on write not match" "unknown", }; @@ -414,6 +415,12 @@ static int32_t mndCheckClusterCfgPara(SMnode *pMnode, SDnodeObj *pDnode, const S return DND_REASON_CHARSET_NOT_MATCH; } + if (pCfg->ttlChangeOnWrite != tsTtlChangeOnWrite) { + mError("dnode:%d, ttlChangeOnWrite:%d inconsistent with cluster:%d", pDnode->id, pCfg->ttlChangeOnWrite, + tsTtlChangeOnWrite); + return DND_REASON_TTL_CHANGE_ON_WRITE_NOT_MATCH; + } + return 0; } @@ -525,13 +532,23 @@ static int32_t mndProcessStatusReq(SRpcMsg *pReq) { SMnodeObj *pObj = mndAcquireMnode(pMnode, pDnode->id); if (pObj != NULL) { - if (pObj->syncState != statusReq.mload.syncState || pObj->syncRestore != statusReq.mload.syncRestore) { - mInfo("dnode:%d, mnode syncState from %s to %s, restoreState from %d to %d", pObj->id, syncStr(pObj->syncState), - syncStr(statusReq.mload.syncState), pObj->syncRestore, statusReq.mload.syncRestore); + bool roleChanged = pObj->syncState != statusReq.mload.syncState || + (statusReq.mload.syncTerm != -1 && pObj->syncTerm != statusReq.mload.syncTerm); + bool restoreChanged = pObj->syncRestore != statusReq.mload.syncRestore; + if (roleChanged || restoreChanged) { + mInfo("dnode:%d, mnode syncState from %s to %s, restoreState from %d to %d, syncTerm from %" PRId64 + " to %" PRId64, + pObj->id, syncStr(pObj->syncState), syncStr(statusReq.mload.syncState), pObj->syncRestore, + statusReq.mload.syncRestore, pObj->syncTerm, statusReq.mload.syncTerm); pObj->syncState = statusReq.mload.syncState; pObj->syncRestore = statusReq.mload.syncRestore; - pObj->stateStartTime = taosGetTimestampMs(); + pObj->syncTerm = statusReq.mload.syncTerm; + } + + if (roleChanged) { + pObj->roleTimeMs = (statusReq.mload.roleTimeMs != 0) ? statusReq.mload.roleTimeMs : taosGetTimestampMs(); } + mndReleaseMnode(pMnode, pObj); } @@ -656,6 +673,7 @@ static int32_t mndConfigDnode(SMnode *pMnode, SRpcMsg *pReq, SMCfgDnodeReq *pCfg STrans *pTrans = NULL; SDnodeObj *pDnode = NULL; bool cfgAll = pCfgReq->dnodeId == -1; + int32_t iter = 0; SSdb *pSdb = pMnode->pSdb; void *pIter = NULL; @@ -663,7 +681,8 @@ static int32_t mndConfigDnode(SMnode *pMnode, SRpcMsg *pReq, SMCfgDnodeReq *pCfg if (cfgAll) { pIter = sdbFetch(pSdb, SDB_DNODE, pIter, (void **)&pDnode); if (pIter == NULL) break; - } else if(!(pDnode = mndAcquireDnode(pMnode, pCfgReq->dnodeId))) { + ++iter; + } else if (!(pDnode = mndAcquireDnode(pMnode, pCfgReq->dnodeId))) { goto _OVER; } @@ -700,7 +719,7 @@ static int32_t mndConfigDnode(SMnode *pMnode, SRpcMsg *pReq, SMCfgDnodeReq *pCfg } if (pTrans && mndTransPrepare(pMnode, pTrans) != 0) goto _OVER; - + tsGrantHBInterval = TMIN(TMAX(5, iter / 2), 30); terrno = 0; _OVER: @@ -709,6 +728,7 @@ _OVER: } else { mndReleaseDnode(pMnode, pDnode); } + sdbCancelFetch(pSdb, pIter); mndTransDrop(pTrans); sdbFreeRaw(pRaw); return terrno; @@ -786,18 +806,22 @@ static int32_t mndProcessShowVariablesReq(SRpcMsg *pReq) { strcpy(info.name, "statusInterval"); snprintf(info.value, TSDB_CONFIG_VALUE_LEN, "%d", tsStatusInterval); + strcpy(info.scope, "server"); taosArrayPush(rsp.variables, &info); strcpy(info.name, "timezone"); snprintf(info.value, TSDB_CONFIG_VALUE_LEN, "%s", tsTimezoneStr); + strcpy(info.scope, "both"); taosArrayPush(rsp.variables, &info); strcpy(info.name, "locale"); snprintf(info.value, TSDB_CONFIG_VALUE_LEN, "%s", tsLocale); + strcpy(info.scope, "both"); taosArrayPush(rsp.variables, &info); strcpy(info.name, "charset"); snprintf(info.value, TSDB_CONFIG_VALUE_LEN, "%s", tsCharset); + strcpy(info.scope, "both"); taosArrayPush(rsp.variables, &info); int32_t rspLen = tSerializeSShowVariablesRsp(NULL, 0, &rsp); @@ -860,7 +884,7 @@ static int32_t mndProcessCreateDnodeReq(SRpcMsg *pReq) { code = mndCreateDnode(pMnode, pReq, &createReq); if (code == 0) code = TSDB_CODE_ACTION_IN_PROGRESS; - + tsGrantHBInterval = 5; _OVER: if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) { mError("dnode:%s:%d, failed to create since %s", createReq.fqdn, createReq.port, terrstr()); diff --git a/source/dnode/mnode/impl/src/mndIndex.c b/source/dnode/mnode/impl/src/mndIndex.c index b4de51204fa7a6af147b77eecb9e574c222e324a..efaff7ffc4c95dac9e4d37b59fe004072426cd75 100644 --- a/source/dnode/mnode/impl/src/mndIndex.c +++ b/source/dnode/mnode/impl/src/mndIndex.c @@ -831,6 +831,7 @@ int32_t mndGetIdxsByTagName(SMnode *pMnode, SStbObj *pStb, char *tagName, SIdxOb if (pIdx->stbUid == pStb->uid && strcasecmp(pIdx->colName, tagName) == 0) { memcpy((char *)idx, (char *)pIdx, sizeof(SIdxObj)); sdbRelease(pSdb, pIdx); + sdbCancelFetch(pSdb, pIter); return 0; } @@ -851,7 +852,7 @@ int32_t mndDropIdxsByDb(SMnode *pMnode, STrans *pTrans, SDbObj *pDb) { if (pIdx->dbUid == pDb->uid) { if (mndSetDropIdxCommitLogs(pMnode, pTrans, pIdx) != 0) { sdbRelease(pSdb, pIdx); - sdbCancelFetch(pSdb, pIdx); + sdbCancelFetch(pSdb, pIter); return -1; } } diff --git a/source/dnode/mnode/impl/src/mndMain.c b/source/dnode/mnode/impl/src/mndMain.c index 381b1e64ed97080a38b3e45e53fe74c18ea3dc15..1071a6cf6ebbe34cf6cd1873f1180ede6c113219 100644 --- a/source/dnode/mnode/impl/src/mndMain.c +++ b/source/dnode/mnode/impl/src/mndMain.c @@ -804,7 +804,7 @@ int32_t mndGetMonitorInfo(SMnode *pMnode, SMonClusterInfo *pClusterInfo, SMonVgr if (pObj->id == pMnode->selfDnodeId) { pClusterInfo->first_ep_dnode_id = pObj->id; tstrncpy(pClusterInfo->first_ep, pObj->pDnode->ep, sizeof(pClusterInfo->first_ep)); - pClusterInfo->master_uptime = mndGetClusterUpTime(pMnode); + pClusterInfo->master_uptime = (float)mndGetClusterUpTime(pMnode) / 86400.0f; // pClusterInfo->master_uptime = (ms - pObj->stateStartTime) / (86400000.0f); tstrncpy(desc.role, syncStr(TAOS_SYNC_STATE_LEADER), sizeof(desc.role)); } else { @@ -890,7 +890,10 @@ int32_t mndGetLoad(SMnode *pMnode, SMnodeLoad *pLoad) { SSyncState state = syncGetState(pMnode->syncMgmt.sync); pLoad->syncState = state.state; pLoad->syncRestore = state.restored; - mTrace("mnode current syncState is %s, syncRestore:%d", syncStr(pLoad->syncState), pLoad->syncRestore); + pLoad->syncTerm = state.term; + pLoad->roleTimeMs = state.roleTimeMs; + mTrace("mnode current syncState is %s, syncRestore:%d, syncTerm:%" PRId64 " ,roleTimeMs:%" PRId64, + syncStr(pLoad->syncState), pLoad->syncRestore, pLoad->syncTerm, pLoad->roleTimeMs); return 0; } diff --git a/source/dnode/mnode/impl/src/mndMnode.c b/source/dnode/mnode/impl/src/mndMnode.c index 91fe1257d2b2ab736cfd35093a4dd40758004477..4ee2bc159b4697564a0614d8ff72d8235e071e98 100644 --- a/source/dnode/mnode/impl/src/mndMnode.c +++ b/source/dnode/mnode/impl/src/mndMnode.c @@ -319,7 +319,7 @@ static int32_t mndBuildCreateMnodeRedoAction(STrans *pTrans, SDCreateMnodeReq *p return 0; } -static int32_t mndBuildAlterMnodeTypeRedoAction(STrans *pTrans, +static int32_t mndBuildAlterMnodeTypeRedoAction(STrans *pTrans, SDAlterMnodeTypeReq *pAlterMnodeTypeReq, SEpSet *pAlterMnodeTypeEpSet) { int32_t contLen = tSerializeSDCreateMnodeReq(NULL, 0, pAlterMnodeTypeReq); void *pReq = taosMemoryMalloc(contLen); @@ -803,9 +803,17 @@ static int32_t mndRetrieveMnodes(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pB int32_t numOfRows = 0; int32_t cols = 0; SMnodeObj *pObj = NULL; + SMnodeObj *pSelfObj = NULL; ESdbStatus objStatus = 0; char *pWrite; int64_t curMs = taosGetTimestampMs(); + int64_t dummyTimeMs = 0; + + pSelfObj = sdbAcquire(pSdb, SDB_MNODE, &pMnode->selfDnodeId); + if (pSelfObj == NULL) { + mError("mnode:%d, failed to acquire self %s", pMnode->selfDnodeId, terrstr()); + goto _out; + } while (numOfRows < rows) { pShow->pIter = sdbFetchAll(pSdb, SDB_MNODE, pShow->pIter, (void **)&pObj, &objStatus, true); @@ -825,7 +833,8 @@ static int32_t mndRetrieveMnodes(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pB if (pObj->id == pMnode->selfDnodeId) { snprintf(role, sizeof(role), "%s%s", syncStr(TAOS_SYNC_STATE_LEADER), pMnode->restored ? "" : "*"); } - if (mndIsDnodeOnline(pObj->pDnode, curMs)) { + bool isDnodeOnline = mndIsDnodeOnline(pObj->pDnode, curMs); + if (isDnodeOnline) { tstrncpy(role, syncStr(pObj->syncState), sizeof(role)); if (pObj->syncState == TAOS_SYNC_STATE_LEADER && pObj->id != pMnode->selfDnodeId) { tstrncpy(role, syncStr(TAOS_SYNC_STATE_ERROR), sizeof(role)); @@ -840,7 +849,7 @@ static int32_t mndRetrieveMnodes(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pB const char *status = "ready"; if (objStatus == SDB_STATUS_CREATING) status = "creating"; if (objStatus == SDB_STATUS_DROPPING) status = "dropping"; - if (!mndIsDnodeOnline(pObj->pDnode, curMs)) status = "offline"; + if (!isDnodeOnline) status = "offline"; char b3[9 + VARSTR_HEADER_SIZE] = {0}; STR_WITH_MAXSIZE_TO_VARSTR(b3, status, pShow->pMeta->pSchemas[cols].bytes); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); @@ -850,7 +859,15 @@ static int32_t mndRetrieveMnodes(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pB colDataSetVal(pColInfo, numOfRows, (const char *)&pObj->createdTime, false); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - colDataSetVal(pColInfo, numOfRows, (const char *)&pObj->stateStartTime, false); + if (pObj->syncTerm != pSelfObj->syncTerm || !isDnodeOnline) { + // state of old term / no status report => use dummyTimeMs + if (pObj->syncTerm > pSelfObj->syncTerm) { + mError("mnode:%d has a newer term:%" PRId64 " than me:%" PRId64, pObj->id, pObj->syncTerm, pSelfObj->syncTerm); + } + colDataSetVal(pColInfo, numOfRows, (const char *)&dummyTimeMs, false); + } else { + colDataSetVal(pColInfo, numOfRows, (const char *)&pObj->roleTimeMs, false); + } numOfRows++; sdbRelease(pSdb, pObj); @@ -858,6 +875,8 @@ static int32_t mndRetrieveMnodes(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pB pShow->numOfRows += numOfRows; +_out: + sdbRelease(pSdb, pSelfObj); return numOfRows; } @@ -999,12 +1018,12 @@ static void mndReloadSyncConfig(SMnode *pMnode) { } if (pMnode->syncMgmt.sync > 0) { - mInfo("vgId:1, mnode sync reconfig, totalReplica:%d replica:%d myIndex:%d", + mInfo("vgId:1, mnode sync reconfig, totalReplica:%d replica:%d myIndex:%d", cfg.totalReplicaNum, cfg.replicaNum, cfg.myIndex); for (int32_t i = 0; i < cfg.totalReplicaNum; ++i) { SNodeInfo *pNode = &cfg.nodeInfo[i]; - mInfo("vgId:1, index:%d, ep:%s:%u dnode:%d cluster:%" PRId64 " role:%d", i, pNode->nodeFqdn, pNode->nodePort, + mInfo("vgId:1, index:%d, ep:%s:%u dnode:%d cluster:%" PRId64 " role:%d", i, pNode->nodeFqdn, pNode->nodePort, pNode->nodeId, pNode->clusterId, pNode->nodeRole); } diff --git a/source/dnode/mnode/impl/src/mndQnode.c b/source/dnode/mnode/impl/src/mndQnode.c index b5c9ce1f650176f9b6a7c116cd4c1f2616ae8817..5ec81440bbb0fa90c0e858b1b2381e0fcd6b4487 100644 --- a/source/dnode/mnode/impl/src/mndQnode.c +++ b/source/dnode/mnode/impl/src/mndQnode.c @@ -454,6 +454,7 @@ int32_t mndCreateQnodeList(SMnode *pMnode, SArray **pList, int32_t limit) { sdbRelease(pSdb, pObj); if (limit > 0 && numOfRows >= limit) { + sdbCancelFetch(pSdb, pIter); break; } } diff --git a/source/dnode/mnode/impl/src/mndScheduler.c b/source/dnode/mnode/impl/src/mndScheduler.c index 33905bad86c74f9f990512ef06815190a54d720f..2aac05b22d2c126780b479a3cbc2fa255d2f3e5f 100644 --- a/source/dnode/mnode/impl/src/mndScheduler.c +++ b/source/dnode/mnode/impl/src/mndScheduler.c @@ -25,6 +25,7 @@ #define SINK_NODE_LEVEL (0) extern bool tsDeployOnSnode; +static int32_t setTaskUpstreamEpInfo(const SStreamTask* pTask, SStreamTask* pDownstream); static int32_t mndAddSinkTaskToStream(SStreamObj* pStream, SArray* pTaskList, SMnode* pMnode, int32_t vgId, SVgObj* pVgroup, int32_t fillHistory); static void setFixedDownstreamEpInfo(SStreamTask* pDstTask, const SStreamTask* pTask); @@ -87,10 +88,10 @@ END: int32_t mndSetSinkTaskInfo(SStreamObj* pStream, SStreamTask* pTask) { if (pStream->smaId != 0) { - pTask->outputType = TASK_OUTPUT__SMA; + pTask->outputInfo.type = TASK_OUTPUT__SMA; pTask->smaSink.smaId = pStream->smaId; } else { - pTask->outputType = TASK_OUTPUT__TABLE; + pTask->outputInfo.type = TASK_OUTPUT__TABLE; pTask->tbSink.stbUid = pStream->targetStbUid; memcpy(pTask->tbSink.stbFullName, pStream->targetSTbName, TSDB_TABLE_FNAME_LEN); pTask->tbSink.pSchemaWrapper = tCloneSSchemaWrapper(&pStream->outputSchema); @@ -110,7 +111,7 @@ int32_t mndAddDispatcherForInternalTask(SMnode* pMnode, SStreamObj* pStream, SAr SDbObj* pDb = mndAcquireDb(pMnode, pStream->targetDb); if (pDb != NULL && pDb->cfg.numOfVgroups > 1) { isShuffle = true; - pTask->outputType = TASK_OUTPUT__SHUFFLE_DISPATCH; + pTask->outputInfo.type = TASK_OUTPUT__SHUFFLE_DISPATCH; pTask->msgInfo.msgType = TDMT_STREAM_TASK_DISPATCH; if (mndExtractDbInfo(pMnode, pDb, &pTask->shuffleDispatcher.dbInfo, NULL) < 0) { return -1; @@ -167,6 +168,7 @@ SSnodeObj* mndSchedFetchOneSnode(SMnode* pMnode) { void* pIter = NULL; // TODO random fetch pIter = sdbFetch(pMnode->pSdb, SDB_SNODE, pIter, (void**)&pObj); + sdbCancelFetch(pMnode->pSdb, pIter); return pObj; } @@ -198,6 +200,7 @@ SVgObj* mndSchedFetchOneVg(SMnode* pMnode, int64_t dbUid) { sdbRelease(pMnode->pSdb, pVgroup); continue; } + sdbCancelFetch(pMnode->pSdb, pIter); return pVgroup; } return pVgroup; @@ -267,10 +270,15 @@ static int32_t addSourceStreamTask(SMnode* pMnode, SVgObj* pVgroup, SArray* pTas return terrno; } + for(int32_t i = 0; i < taosArrayGetSize(pSinkTaskList); ++i) { + SStreamTask* pSinkTask = taosArrayGetP(pSinkTaskList, i); + setTaskUpstreamEpInfo(pTask, pSinkTask); + } + return TSDB_CODE_SUCCESS; } -static SStreamChildEpInfo* createStreamTaskEpInfo(SStreamTask* pTask) { +static SStreamChildEpInfo* createStreamTaskEpInfo(const SStreamTask* pTask) { SStreamChildEpInfo* pEpInfo = taosMemoryMalloc(sizeof(SStreamChildEpInfo)); if (pEpInfo == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -291,11 +299,11 @@ void setFixedDownstreamEpInfo(SStreamTask* pDstTask, const SStreamTask* pTask) { pDispatcher->nodeId = pTask->info.nodeId; pDispatcher->epSet = pTask->info.epSet; - pDstTask->outputType = TASK_OUTPUT__FIXED_DISPATCH; + pDstTask->outputInfo.type = TASK_OUTPUT__FIXED_DISPATCH; pDstTask->msgInfo.msgType = TDMT_STREAM_TASK_DISPATCH; } -int32_t setEpToDownstreamTask(SStreamTask* pTask, SStreamTask* pDownstream) { +int32_t setTaskUpstreamEpInfo(const SStreamTask* pTask, SStreamTask* pDownstream) { SStreamChildEpInfo* pEpInfo = createStreamTaskEpInfo(pTask); if (pEpInfo == NULL) { return TSDB_CODE_OUT_OF_MEMORY; @@ -418,7 +426,7 @@ static int32_t doAddSourceTask(SArray* pTaskList, int8_t fillHistory, int64_t ui return -1; } - return setEpToDownstreamTask(pTask, pDownstreamTask); + return setTaskUpstreamEpInfo(pTask, pDownstreamTask); } static int32_t doAddAggTask(uint64_t uid, SArray* pTaskList, SArray* pSinkNodeList, SMnode* pMnode, SStreamObj* pStream, @@ -586,6 +594,14 @@ static int32_t addSinkTasks(SArray* pTasksList, SMnode* pMnode, SStreamObj* pStr return TSDB_CODE_SUCCESS; } +static void setSinkTaskUpstreamInfo(SArray* pTasksList, const SStreamTask* pUpstreamTask) { + SArray* pSinkTaskList = taosArrayGetP(pTasksList, SINK_NODE_LEVEL); + for(int32_t i = 0; i < taosArrayGetSize(pSinkTaskList); ++i) { + SStreamTask* pSinkTask = taosArrayGetP(pSinkTaskList, i); + setTaskUpstreamEpInfo(pUpstreamTask, pSinkTask); + } +} + static int32_t doScheduleStream(SStreamObj* pStream, SMnode* pMnode, SQueryPlan* pPlan, int64_t nextWindowSkey) { SSdb* pSdb = pMnode->pSdb; int32_t numOfPlanLevel = LIST_LENGTH(pPlan->pSubplans); @@ -637,6 +653,9 @@ static int32_t doScheduleStream(SStreamObj* pStream, SMnode* pMnode, SQueryPlan* return code; } + setSinkTaskUpstreamInfo(pStream->tasks, pAggTask); + setSinkTaskUpstreamInfo(pStream->pHTasksList, pHAggTask); + // source level return addSourceTasksForMultiLevelStream(pMnode, pPlan, pStream, pAggTask, pHAggTask, nextWindowSkey); } else if (numOfPlanLevel == 1) { diff --git a/source/dnode/mnode/impl/src/mndShow.c b/source/dnode/mnode/impl/src/mndShow.c index c50b205f37f569fced5a690f329ae3ba26c1ac6a..44f47517006c46b5f3cdc69360757402b2e6f352 100644 --- a/source/dnode/mnode/impl/src/mndShow.c +++ b/source/dnode/mnode/impl/src/mndShow.c @@ -20,6 +20,7 @@ #define SHOW_STEP_SIZE 100 #define SHOW_COLS_STEP_SIZE 4096 +#define SHOW_PRIVILEGES_STEP_SIZE 2048 static SShowObj *mndCreateShowObj(SMnode *pMnode, SRetrieveTableReq *pReq); static void mndFreeShowObj(SShowObj *pShow); @@ -234,6 +235,8 @@ static int32_t mndProcessRetrieveSysTableReq(SRpcMsg *pReq) { if(pShow->type == TSDB_MGMT_TABLE_COL){ // expend capacity for ins_columns rowsToRead = SHOW_COLS_STEP_SIZE; + } else if (pShow->type == TSDB_MGMT_TABLE_PRIVILEGES) { + rowsToRead = SHOW_PRIVILEGES_STEP_SIZE; } ShowRetrieveFp retrieveFp = pMgmt->retrieveFps[pShow->type]; if (retrieveFp == NULL) { diff --git a/source/dnode/mnode/impl/src/mndSma.c b/source/dnode/mnode/impl/src/mndSma.c index 6f323b2b427b9187198a227b07500ec9e4f87a18..b84297f6bfd77f33ca2e27b04fd5b2b172a1286c 100644 --- a/source/dnode/mnode/impl/src/mndSma.c +++ b/source/dnode/mnode/impl/src/mndSma.c @@ -504,6 +504,11 @@ static void mndDestroySmaObj(SSmaObj *pSmaObj) { static int32_t mndCreateSma(SMnode *pMnode, SRpcMsg *pReq, SMCreateSmaReq *pCreate, SDbObj *pDb, SStbObj *pStb, const char *streamName) { + if (pDb->cfg.replications > 1) { + terrno = TSDB_CODE_MND_INVALID_SMA_OPTION; + mError("sma:%s, failed to create since not support multiple replicas", pCreate->name); + return -1; + } SSmaObj smaObj = {0}; memcpy(smaObj.name, pCreate->name, TSDB_TABLE_FNAME_LEN); memcpy(smaObj.stb, pStb->name, TSDB_TABLE_FNAME_LEN); diff --git a/source/dnode/mnode/impl/src/mndStb.c b/source/dnode/mnode/impl/src/mndStb.c index d6537ef9926cd01bffd8f2ed985e9d3c7fff7d7f..70fd74afc0a80b327265e17d2cb5f1a8d0069769 100644 --- a/source/dnode/mnode/impl/src/mndStb.c +++ b/source/dnode/mnode/impl/src/mndStb.c @@ -900,7 +900,6 @@ static int32_t mndProcessTtlTimer(SRpcMsg *pReq) { SMsgHead *pHead = rpcMallocCont(contLen); if (pHead == NULL) { - sdbCancelFetch(pSdb, pVgroup); sdbRelease(pSdb, pVgroup); continue; } @@ -1240,6 +1239,7 @@ static int32_t mndCheckAlterColForTopic(SMnode *pMnode, const char *stbFullName, terrno = TSDB_CODE_MND_FIELD_CONFLICT_WITH_TOPIC; mError("topic:%s, create ast error", pTopic->name); sdbRelease(pSdb, pTopic); + sdbCancelFetch(pSdb, pIter); return -1; } @@ -1260,6 +1260,7 @@ static int32_t mndCheckAlterColForTopic(SMnode *pMnode, const char *stbFullName, mError("topic:%s, check colId:%d conflicted", pTopic->name, pCol->colId); nodesDestroyNode(pAst); nodesDestroyList(pNodeList); + sdbCancelFetch(pSdb, pIter); sdbRelease(pSdb, pTopic); return -1; } @@ -1287,6 +1288,7 @@ static int32_t mndCheckAlterColForStream(SMnode *pMnode, const char *stbFullName terrno = TSDB_CODE_MND_INVALID_STREAM_OPTION; mError("stream:%s, create ast error", pStream->name); sdbRelease(pSdb, pStream); + sdbCancelFetch(pSdb, pIter); return -1; } @@ -1306,6 +1308,7 @@ static int32_t mndCheckAlterColForStream(SMnode *pMnode, const char *stbFullName nodesDestroyNode(pAst); nodesDestroyList(pNodeList); sdbRelease(pSdb, pStream); + sdbCancelFetch(pSdb, pIter); return -1; } mInfo("stream:%s, check colId:%d passed", pStream->name, pCol->colId); @@ -1335,6 +1338,7 @@ static int32_t mndCheckAlterColForTSma(SMnode *pMnode, const char *stbFullName, terrno = TSDB_CODE_SDB_INVALID_DATA_CONTENT; mError("tsma:%s, check tag and column modifiable, stb:%s suid:%" PRId64 " colId:%d failed since parse AST err", pSma->name, stbFullName, suid, colId); + sdbCancelFetch(pSdb, pIter); return -1; } @@ -1355,6 +1359,7 @@ static int32_t mndCheckAlterColForTSma(SMnode *pMnode, const char *stbFullName, nodesDestroyNode(pAst); nodesDestroyList(pNodeList); sdbRelease(pSdb, pSma); + sdbCancelFetch(pSdb, pIter); return -1; } mInfo("tsma:%s, check colId:%d passed", pSma->name, pCol->colId); @@ -2271,6 +2276,7 @@ static int32_t mndCheckDropStbForTopic(SMnode *pMnode, const char *stbFullName, if (pTopic->subType == TOPIC_SUB_TYPE__TABLE) { if (pTopic->stbUid == suid) { sdbRelease(pSdb, pTopic); + sdbCancelFetch(pSdb, pIter); return -1; } } @@ -2285,6 +2291,7 @@ static int32_t mndCheckDropStbForTopic(SMnode *pMnode, const char *stbFullName, terrno = TSDB_CODE_MND_INVALID_TOPIC_OPTION; mError("topic:%s, create ast error", pTopic->name); sdbRelease(pSdb, pTopic); + sdbCancelFetch(pSdb, pIter); return -1; } @@ -2298,6 +2305,7 @@ static int32_t mndCheckDropStbForTopic(SMnode *pMnode, const char *stbFullName, sdbRelease(pSdb, pTopic); nodesDestroyNode(pAst); nodesDestroyList(pNodeList); + sdbCancelFetch(pSdb, pIter); return -1; } else { goto NEXT; @@ -2325,6 +2333,7 @@ static int32_t mndCheckDropStbForStream(SMnode *pMnode, const char *stbFullName, } if (pStream->targetStbUid == suid) { + sdbCancelFetch(pSdb, pIter); sdbRelease(pSdb, pStream); return -1; } @@ -2333,6 +2342,7 @@ static int32_t mndCheckDropStbForStream(SMnode *pMnode, const char *stbFullName, if (nodesStringToNode(pStream->ast, &pAst) != 0) { terrno = TSDB_CODE_MND_INVALID_STREAM_OPTION; mError("stream:%s, create ast error", pStream->name); + sdbCancelFetch(pSdb, pIter); sdbRelease(pSdb, pStream); return -1; } @@ -2344,6 +2354,7 @@ static int32_t mndCheckDropStbForStream(SMnode *pMnode, const char *stbFullName, SColumnNode *pCol = (SColumnNode *)pNode; if (pCol->tableId == suid) { + sdbCancelFetch(pSdb, pIter); sdbRelease(pSdb, pStream); nodesDestroyNode(pAst); nodesDestroyList(pNodeList); @@ -2498,12 +2509,14 @@ static int32_t mndProcessTableCfgReq(SRpcMsg *pReq) { goto _OVER; } - if (0 == strcmp(cfgReq.dbFName, TSDB_INFORMATION_SCHEMA_DB)) { + char dbName[TSDB_DB_NAME_LEN] = {0}; + mndExtractShortDbNameFromDbFullName(cfgReq.dbFName, dbName); + if (0 == strcmp(dbName, TSDB_INFORMATION_SCHEMA_DB)) { mInfo("information_schema table:%s.%s, start to retrieve cfg", cfgReq.dbFName, cfgReq.tbName); if (mndBuildInsTableCfg(pMnode, cfgReq.dbFName, cfgReq.tbName, &cfgRsp) != 0) { goto _OVER; } - } else if (0 == strcmp(cfgReq.dbFName, TSDB_PERFORMANCE_SCHEMA_DB)) { + } else if (0 == strcmp(dbName, TSDB_PERFORMANCE_SCHEMA_DB)) { mInfo("performance_schema table:%s.%s, start to retrieve cfg", cfgReq.dbFName, cfgReq.tbName); if (mndBuildPerfsTableCfg(pMnode, cfgReq.dbFName, cfgReq.tbName, &cfgRsp) != 0) { goto _OVER; @@ -2672,6 +2685,13 @@ void mndExtractShortDbNameFromStbFullName(const char *stbFullName, char *dst) { tNameGetDbName(&name, dst); } +void mndExtractShortDbNameFromDbFullName(const char *stbFullName, char *dst) { + SName name = {0}; + tNameFromString(&name, stbFullName, T_NAME_ACCT | T_NAME_DB); + + tNameGetDbName(&name, dst); +} + void mndExtractTbNameFromStbFullName(const char *stbFullName, char *dst, int32_t dstSize) { int32_t pos = -1; int32_t num = 0; diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index 9d8948650a23476461e8787e5ba17c8ecb1c99d1..06ab1bb638eb71c092393280d7e0b8648f30a9b6 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -740,12 +740,14 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { if (numOfStream > MND_STREAM_MAX_NUM) { mError("too many streams, no more than %d for each database", MND_STREAM_MAX_NUM); terrno = TSDB_CODE_MND_TOO_MANY_STREAMS; + sdbCancelFetch(pMnode->pSdb, pIter); goto _OVER; } if (pStream->targetStbUid == streamObj.targetStbUid) { mError("Cannot write the same stable as other stream:%s", pStream->name); terrno = TSDB_CODE_MND_INVALID_TARGET_TABLE; + sdbCancelFetch(pMnode->pSdb, pIter); goto _OVER; } } diff --git a/source/dnode/mnode/impl/src/mndSubscribe.c b/source/dnode/mnode/impl/src/mndSubscribe.c index 48de21199b9bc1ae0611b97ba270fa802ac65a8e..6f50b9ff9f626ea505d139f1c31f21960322fc30 100644 --- a/source/dnode/mnode/impl/src/mndSubscribe.c +++ b/source/dnode/mnode/impl/src/mndSubscribe.c @@ -597,7 +597,7 @@ static int32_t mndPersistRebResult(SMnode *pMnode, SRpcMsg *pMsg, const SMqRebOu for (int32_t i = 0; i < consumerNum; i++) { int64_t consumerId = *(int64_t *)taosArrayGet(pOutput->modifyConsumers, i); SMqConsumerObj *pConsumerNew = tNewSMqConsumerObj(consumerId, cgroup); - pConsumerNew->updateType = CONSUMER_UPDATE_REB_MODIFY_NOTOPIC; + pConsumerNew->updateType = CONSUMER_UPDATE_REB; if (mndSetConsumerCommitLogs(pMnode, pTrans, pConsumerNew) != 0) { tDeleteSMqConsumerObj(pConsumerNew, true); @@ -613,7 +613,7 @@ static int32_t mndPersistRebResult(SMnode *pMnode, SRpcMsg *pMsg, const SMqRebOu for (int32_t i = 0; i < consumerNum; i++) { int64_t consumerId = *(int64_t *)taosArrayGet(pOutput->newConsumers, i); SMqConsumerObj *pConsumerNew = tNewSMqConsumerObj(consumerId, cgroup); - pConsumerNew->updateType = CONSUMER_UPDATE_REB_MODIFY_TOPIC; + pConsumerNew->updateType = CONSUMER_ADD_REB; char* topicTmp = taosStrdup(topic); taosArrayPush(pConsumerNew->rebNewTopics, &topicTmp); @@ -633,7 +633,7 @@ static int32_t mndPersistRebResult(SMnode *pMnode, SRpcMsg *pMsg, const SMqRebOu int64_t consumerId = *(int64_t *)taosArrayGet(pOutput->removedConsumers, i); SMqConsumerObj *pConsumerNew = tNewSMqConsumerObj(consumerId, cgroup); - pConsumerNew->updateType = CONSUMER_UPDATE_REB_MODIFY_REMOVE; + pConsumerNew->updateType = CONSUMER_REMOVE_REB; char* topicTmp = taosStrdup(topic); taosArrayPush(pConsumerNew->rebRemovedTopics, &topicTmp); @@ -692,6 +692,7 @@ static int32_t mndProcessRebalanceReq(SRpcMsg *pMsg) { taosArrayDestroy(rebOutput.modifyConsumers); taosArrayDestroy(rebOutput.rebVgs); + taosHashCancelIterate(pReq->rebSubHash, pIter); terrno = TSDB_CODE_OUT_OF_MEMORY; mInfo("mq re-balance failed, due to out of memory"); taosHashCleanup(pReq->rebSubHash); @@ -1104,6 +1105,7 @@ int32_t mndDropSubByTopic(SMnode *pMnode, STrans *pTrans, const char *topicName) if (taosHashGetSize(pSub->consumerHash) != 0) { sdbRelease(pSdb, pSub); terrno = TSDB_CODE_MND_IN_REBALANCE; + sdbCancelFetch(pSdb, pIter); return -1; } int32_t sz = taosArrayGetSize(pSub->unassignedVgs); @@ -1122,12 +1124,14 @@ int32_t mndDropSubByTopic(SMnode *pMnode, STrans *pTrans, const char *topicName) if (mndTransAppendRedoAction(pTrans, &action) != 0) { taosMemoryFree(pReq); sdbRelease(pSdb, pSub); + sdbCancelFetch(pSdb, pIter); return -1; } } if (mndSetDropSubRedoLogs(pMnode, pTrans, pSub) < 0) { sdbRelease(pSdb, pSub); + sdbCancelFetch(pSdb, pIter); goto END; } @@ -1165,7 +1169,7 @@ static int32_t buildResult(SSDataBlock *pBlock, int32_t* numOfRows, int64_t cons pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); colDataSetVal(pColInfo, *numOfRows, (const char *)consumerIdHex, consumerId == -1); - mDebug("mnd show subscriptions: topic %s, consumer:0x%" PRIx64 " cgroup %s vgid %d", varDataVal(topic), + mInfo("mnd show subscriptions: topic %s, consumer:0x%" PRIx64 " cgroup %s vgid %d", varDataVal(topic), consumerId, varDataVal(cgroup), pVgEp->vgId); // offset @@ -1204,7 +1208,7 @@ int32_t mndRetrieveSubscribe(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock int32_t numOfRows = 0; SMqSubscribeObj *pSub = NULL; - mDebug("mnd show subscriptions begin"); + mInfo("mnd show subscriptions begin"); while (numOfRows < rowsCapacity) { pShow->pIter = sdbFetch(pSdb, SDB_SUBSCRIBE, pShow->pIter, (void **)&pSub); @@ -1244,7 +1248,7 @@ int32_t mndRetrieveSubscribe(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock sdbRelease(pSdb, pSub); } - mDebug("mnd end show subscriptions"); + mInfo("mnd end show subscriptions"); pShow->numOfRows += numOfRows; return numOfRows; diff --git a/source/dnode/mnode/impl/src/mndTopic.c b/source/dnode/mnode/impl/src/mndTopic.c index 485823edf3547309c2f57313415b575c961b4206..85e6f1caf6007ca53da9b08c2ad07b8147121516 100644 --- a/source/dnode/mnode/impl/src/mndTopic.c +++ b/source/dnode/mnode/impl/src/mndTopic.c @@ -513,6 +513,7 @@ static int32_t mndCreateTopic(SMnode *pMnode, SRpcMsg *pReq, SCMCreateTopicReq * tEncodeSize(tEncodeSTqCheckInfo, &info, len, code); if (code < 0) { sdbRelease(pSdb, pVgroup); + sdbCancelFetch(pSdb, pIter); goto _OUT; } void *buf = taosMemoryCalloc(1, sizeof(SMsgHead) + len); @@ -522,6 +523,7 @@ static int32_t mndCreateTopic(SMnode *pMnode, SRpcMsg *pReq, SCMCreateTopicReq * if (tEncodeSTqCheckInfo(&encoder, &info) < 0) { taosMemoryFree(buf); sdbRelease(pSdb, pVgroup); + sdbCancelFetch(pSdb, pIter); goto _OUT; } tEncoderClear(&encoder); @@ -535,6 +537,7 @@ static int32_t mndCreateTopic(SMnode *pMnode, SRpcMsg *pReq, SCMCreateTopicReq * if (mndTransAppendRedoAction(pTrans, &action) != 0) { taosMemoryFree(buf); sdbRelease(pSdb, pVgroup); + sdbCancelFetch(pSdb, pIter); goto _OUT; } buf = NULL; @@ -647,7 +650,6 @@ static int32_t mndDropTopic(SMnode *pMnode, STrans *pTrans, SRpcMsg *pReq, SMqTo code = 0; _OVER: - mndTransDrop(pTrans); return code; } @@ -698,6 +700,7 @@ static int32_t mndProcessDropTopicReq(SRpcMsg *pReq) { if (strcmp(name, pTopic->name) == 0) { mndReleaseConsumer(pMnode, pConsumer); mndReleaseTopic(pMnode, pTopic); + sdbCancelFetch(pSdb, pIter); terrno = TSDB_CODE_MND_TOPIC_SUBSCRIBED; mError("topic:%s, failed to drop since subscribed by consumer:0x%" PRIx64 ", in consumer group %s", dropReq.name, pConsumer->consumerId, pConsumer->cgroup); @@ -711,6 +714,7 @@ static int32_t mndProcessDropTopicReq(SRpcMsg *pReq) { if (strcmp(name, pTopic->name) == 0) { mndReleaseConsumer(pMnode, pConsumer); mndReleaseTopic(pMnode, pTopic); + sdbCancelFetch(pSdb, pIter); terrno = TSDB_CODE_MND_TOPIC_SUBSCRIBED; mError("topic:%s, failed to drop since subscribed by consumer:%" PRId64 ", in consumer group %s (reb new)", dropReq.name, pConsumer->consumerId, pConsumer->cgroup); @@ -724,6 +728,7 @@ static int32_t mndProcessDropTopicReq(SRpcMsg *pReq) { if (strcmp(name, pTopic->name) == 0) { mndReleaseConsumer(pMnode, pConsumer); mndReleaseTopic(pMnode, pTopic); + sdbCancelFetch(pSdb, pIter); terrno = TSDB_CODE_MND_TOPIC_SUBSCRIBED; mError("topic:%s, failed to drop since subscribed by consumer:%" PRId64 ", in consumer group %s (reb remove)", dropReq.name, pConsumer->consumerId, pConsumer->cgroup); @@ -735,6 +740,7 @@ static int32_t mndProcessDropTopicReq(SRpcMsg *pReq) { } if (mndCheckDbPrivilegeByName(pMnode, pReq->info.conn.user, MND_OPER_READ_DB, pTopic->db) != 0) { + mndReleaseTopic(pMnode, pTopic); return -1; } @@ -788,6 +794,8 @@ static int32_t mndProcessDropTopicReq(SRpcMsg *pReq) { if (mndTransAppendRedoAction(pTrans, &action) != 0) { taosMemoryFree(buf); sdbRelease(pSdb, pVgroup); + mndReleaseTopic(pMnode, pTopic); + sdbCancelFetch(pSdb, pIter); mndTransDrop(pTrans); return -1; } @@ -796,6 +804,7 @@ static int32_t mndProcessDropTopicReq(SRpcMsg *pReq) { int32_t code = mndDropTopic(pMnode, pTrans, pReq, pTopic); mndReleaseTopic(pMnode, pTopic); + mndTransDrop(pTrans); if (code != 0) { mError("topic:%s, failed to drop since %s", dropReq.name, terrstr()); @@ -999,6 +1008,7 @@ bool mndTopicExistsForDb(SMnode *pMnode, SDbObj *pDb) { if (pTopic->dbUid == pDb->uid) { sdbRelease(pSdb, pTopic); + sdbCancelFetch(pSdb, pIter); return true; } diff --git a/source/dnode/mnode/impl/src/mndUser.c b/source/dnode/mnode/impl/src/mndUser.c index 1fc2e42b8ca9be737adfb60e8312c390dbf3f5b7..65c1cfbea2240d2347eafd79add3bcc740621e85 100644 --- a/source/dnode/mnode/impl/src/mndUser.c +++ b/source/dnode/mnode/impl/src/mndUser.c @@ -630,6 +630,11 @@ static int32_t mndProcessCreateUserReq(SRpcMsg *pReq) { goto _OVER; } + if (strlen(createReq.pass) >= TSDB_PASSWORD_LEN){ + terrno = TSDB_CODE_PAR_NAME_OR_PASSWD_TOO_LONG; + goto _OVER; + } + pUser = mndAcquireUser(pMnode, createReq.user); if (pUser != NULL) { terrno = TSDB_CODE_MND_USER_ALREADY_EXIST; @@ -922,19 +927,19 @@ static int32_t mndProcessAlterUserReq(SRpcMsg *pReq) { } } - if (alterReq.alterType == TSDB_ALTER_USER_ADD_READ_TABLE) { + if (alterReq.alterType == TSDB_ALTER_USER_ADD_READ_TABLE || alterReq.alterType == TSDB_ALTER_USER_ADD_ALL_TABLE) { if (mndTablePriviledge(pMnode, newUser.readTbs, newUser.useDbs, &alterReq, pSdb) != 0) goto _OVER; } - if (alterReq.alterType == TSDB_ALTER_USER_ADD_WRITE_TABLE) { + if (alterReq.alterType == TSDB_ALTER_USER_ADD_WRITE_TABLE || alterReq.alterType == TSDB_ALTER_USER_ADD_ALL_TABLE) { if (mndTablePriviledge(pMnode, newUser.writeTbs, newUser.useDbs, &alterReq, pSdb) != 0) goto _OVER; } - if (alterReq.alterType == TSDB_ALTER_USER_REMOVE_READ_TABLE) { + if (alterReq.alterType == TSDB_ALTER_USER_REMOVE_READ_TABLE || alterReq.alterType == TSDB_ALTER_USER_REMOVE_ALL_TABLE) { if (mndRemoveTablePriviledge(pMnode, newUser.readTbs, newUser.useDbs, &alterReq, pSdb) != 0) goto _OVER; } - if (alterReq.alterType == TSDB_ALTER_USER_REMOVE_WRITE_TABLE) { + if (alterReq.alterType == TSDB_ALTER_USER_REMOVE_WRITE_TABLE || alterReq.alterType == TSDB_ALTER_USER_REMOVE_ALL_TABLE) { if (mndRemoveTablePriviledge(pMnode, newUser.writeTbs, newUser.useDbs, &alterReq, pSdb) != 0) goto _OVER; } @@ -1169,26 +1174,30 @@ static void mndLoopHash(SHashObj *hash, char *priType, SSDataBlock *pBlock, int3 if (strcmp("t", value) != 0) { SNode *pAst = NULL; int32_t sqlLen = 0; - char sql[TSDB_EXPLAIN_RESULT_ROW_SIZE] = {0}; + size_t bufSz = strlen(value) + 1; + char* sql = taosMemoryMalloc(bufSz + 1); + char* obj = taosMemoryMalloc(TSDB_PRIVILEDGE_CONDITION_LEN + VARSTR_HEADER_SIZE); - if (nodesStringToNode(value, &pAst) == 0) { - nodesNodeToSQL(pAst, sql, TSDB_EXPLAIN_RESULT_ROW_SIZE, &sqlLen); + if (sql != NULL && obj != NULL && nodesStringToNode(value, &pAst) == 0) { + nodesNodeToSQL(pAst, sql, bufSz, &sqlLen); nodesDestroyNode(pAst); } else { sqlLen = 5; sprintf(sql, "error"); } - char obj[TSDB_PRIVILEDGE_CONDITION_LEN + VARSTR_HEADER_SIZE] = {0}; STR_WITH_MAXSIZE_TO_VARSTR(obj, sql, pShow->pMeta->pSchemas[cols].bytes); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); colDataSetVal(pColInfo, *numOfRows, (const char *)obj, false); + taosMemoryFree(obj); + taosMemoryFree(sql); } else { - char condition[TSDB_PRIVILEDGE_CONDITION_LEN + VARSTR_HEADER_SIZE] = {0}; + char* condition = taosMemoryMalloc(TSDB_PRIVILEDGE_CONDITION_LEN + VARSTR_HEADER_SIZE); STR_WITH_MAXSIZE_TO_VARSTR(condition, "", pShow->pMeta->pSchemas[cols].bytes); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); colDataSetVal(pColInfo, *numOfRows, (const char *)condition, false); + taosMemoryFree(condition); } (*numOfRows)++; @@ -1204,16 +1213,34 @@ static int32_t mndRetrievePrivileges(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock int32_t cols = 0; char *pWrite; + bool fetchNextUser = pShow->restore ? false : true; + pShow->restore = false; + while (numOfRows < rows) { - pShow->pIter = sdbFetch(pSdb, SDB_USER, pShow->pIter, (void **)&pUser); - if (pShow->pIter == NULL) break; + if (fetchNextUser) { + pShow->pIter = sdbFetch(pSdb, SDB_USER, pShow->pIter, (void **)&pUser); + if (pShow->pIter == NULL) break; + } else { + fetchNextUser = true; + void *pKey = taosHashGetKey(pShow->pIter, NULL); + pUser = sdbAcquire(pSdb, SDB_USER, pKey); + if (!pUser) { + continue; + } + } int32_t numOfReadDbs = taosHashGetSize(pUser->readDbs); int32_t numOfWriteDbs = taosHashGetSize(pUser->writeDbs); int32_t numOfTopics = taosHashGetSize(pUser->topics); int32_t numOfReadTbs = taosHashGetSize(pUser->readTbs); int32_t numOfWriteTbs = taosHashGetSize(pUser->writeTbs); - if (numOfRows + numOfReadDbs + numOfWriteDbs + numOfTopics + numOfReadTbs + numOfWriteTbs >= rows) break; + if (numOfRows + numOfReadDbs + numOfWriteDbs + numOfTopics + numOfReadTbs + numOfWriteTbs >= rows) { + mInfo("will restore. current num of rows: %d, read dbs %d, write dbs %d, topics %d, read tables %d, write tables %d", + numOfRows, numOfReadDbs, numOfWriteDbs, numOfTopics, numOfReadTbs, numOfWriteTbs); + pShow->restore = true; + sdbRelease(pSdb, pUser); + break; + } if (pUser->superUser) { cols = 0; @@ -1237,10 +1264,11 @@ static int32_t mndRetrievePrivileges(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); colDataSetVal(pColInfo, numOfRows, (const char *)tableName, false); - char condition[TSDB_PRIVILEDGE_CONDITION_LEN + VARSTR_HEADER_SIZE] = {0}; + char* condition = taosMemoryMalloc(TSDB_PRIVILEDGE_CONDITION_LEN + VARSTR_HEADER_SIZE); STR_WITH_MAXSIZE_TO_VARSTR(condition, "", pShow->pMeta->pSchemas[cols].bytes); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); colDataSetVal(pColInfo, numOfRows, (const char *)condition, false); + taosMemoryFree(condition); numOfRows++; } @@ -1271,10 +1299,11 @@ static int32_t mndRetrievePrivileges(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); colDataSetVal(pColInfo, numOfRows, (const char *)tableName, false); - char condition[TSDB_PRIVILEDGE_CONDITION_LEN + VARSTR_HEADER_SIZE] = {0}; + char* condition = taosMemoryMalloc(TSDB_PRIVILEDGE_CONDITION_LEN + VARSTR_HEADER_SIZE); STR_WITH_MAXSIZE_TO_VARSTR(condition, "", pShow->pMeta->pSchemas[cols].bytes); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); colDataSetVal(pColInfo, numOfRows, (const char *)condition, false); + taosMemoryFree(condition); numOfRows++; db = taosHashIterate(pUser->readDbs, db); @@ -1306,10 +1335,11 @@ static int32_t mndRetrievePrivileges(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); colDataSetVal(pColInfo, numOfRows, (const char *)tableName, false); - char condition[TSDB_PRIVILEDGE_CONDITION_LEN + VARSTR_HEADER_SIZE] = {0}; + char* condition = taosMemoryMalloc(TSDB_PRIVILEDGE_CONDITION_LEN + VARSTR_HEADER_SIZE); STR_WITH_MAXSIZE_TO_VARSTR(condition, "", pShow->pMeta->pSchemas[cols].bytes); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); colDataSetVal(pColInfo, numOfRows, (const char *)condition, false); + taosMemoryFree(condition); numOfRows++; db = taosHashIterate(pUser->writeDbs, db); @@ -1343,10 +1373,11 @@ static int32_t mndRetrievePrivileges(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); colDataSetVal(pColInfo, numOfRows, (const char *)tableName, false); - char condition[TSDB_PRIVILEDGE_CONDITION_LEN + VARSTR_HEADER_SIZE] = {0}; + char* condition = taosMemoryMalloc(TSDB_PRIVILEDGE_CONDITION_LEN + VARSTR_HEADER_SIZE); STR_WITH_MAXSIZE_TO_VARSTR(condition, "", pShow->pMeta->pSchemas[cols].bytes); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); colDataSetVal(pColInfo, numOfRows, (const char *)condition, false); + taosMemoryFree(condition); numOfRows++; topic = taosHashIterate(pUser->topics, topic); @@ -1444,7 +1475,9 @@ int32_t mndUserRemoveDb(SMnode *pMnode, STrans *pTrans, char *db) { if (pIter == NULL) break; code = -1; - if (mndUserDupObj(pUser, &newUser) != 0) break; + if (mndUserDupObj(pUser, &newUser) != 0) { + break; + } bool inRead = (taosHashGet(newUser.readDbs, db, len) != NULL); bool inWrite = (taosHashGet(newUser.writeDbs, db, len) != NULL); @@ -1453,7 +1486,9 @@ int32_t mndUserRemoveDb(SMnode *pMnode, STrans *pTrans, char *db) { (void)taosHashRemove(newUser.writeDbs, db, len); SSdbRaw *pCommitRaw = mndUserActionEncode(&newUser); - if (pCommitRaw == NULL || mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) break; + if (pCommitRaw == NULL || mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) { + break; + } (void)sdbSetRawStatus(pCommitRaw, SDB_STATUS_READY); } @@ -1491,7 +1526,9 @@ int32_t mndUserRemoveTopic(SMnode *pMnode, STrans *pTrans, char *topic) { if (inTopic) { (void)taosHashRemove(newUser.topics, topic, len); SSdbRaw *pCommitRaw = mndUserActionEncode(&newUser); - if (pCommitRaw == NULL || mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) break; + if (pCommitRaw == NULL || mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) { + break; + } (void)sdbSetRawStatus(pCommitRaw, SDB_STATUS_READY); } diff --git a/source/dnode/mnode/impl/src/mndVgroup.c b/source/dnode/mnode/impl/src/mndVgroup.c index 186927fdc6d2977a61377dc455598f8ceb3bab0b..1b5826e7cc09b93f34ede24ff181525f4950ed73 100644 --- a/source/dnode/mnode/impl/src/mndVgroup.c +++ b/source/dnode/mnode/impl/src/mndVgroup.c @@ -2939,6 +2939,7 @@ static int32_t mndProcessBalanceVgroupMsg(SRpcMsg *pReq) { pIter = sdbFetch(pMnode->pSdb, SDB_DNODE, pIter, (void **)&pDnode); if (pIter == NULL) break; if (!mndIsDnodeOnline(pDnode, curMs)) { + sdbCancelFetch(pMnode->pSdb, pIter); terrno = TSDB_CODE_MND_HAS_OFFLINE_DNODE; mError("failed to balance vgroup since %s, dnode:%d", terrstr(), pDnode->id); sdbRelease(pMnode->pSdb, pDnode); diff --git a/source/dnode/snode/src/snode.c b/source/dnode/snode/src/snode.c index e4bc184be32fcb2a5bfd45768d27322e4799e5da..7235a56691ce580d8cb67d923245e26fdb53c82f 100644 --- a/source/dnode/snode/src/snode.c +++ b/source/dnode/snode/src/snode.c @@ -66,14 +66,15 @@ int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t ver) { pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE; pTask->inputQueue = streamQueueOpen(512 << 10); - pTask->outputQueue = streamQueueOpen(512 << 10); + pTask->outputInfo.queue = streamQueueOpen(512 << 10); - if (pTask->inputQueue == NULL || pTask->outputQueue == NULL) { + if (pTask->inputQueue == NULL || pTask->outputInfo.queue == NULL) { return -1; } + pTask->tsInfo.init = taosGetTimestampMs(); pTask->inputStatus = TASK_INPUT_STATUS__NORMAL; - pTask->outputStatus = TASK_OUTPUT_STATUS__NORMAL; + pTask->outputInfo.status = TASK_OUTPUT_STATUS__NORMAL; pTask->pMsgCb = &pSnode->msgCb; pTask->chkInfo.version = ver; pTask->pMeta = pSnode->pMeta; @@ -90,6 +91,7 @@ int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t ver) { pTask->exec.pExecutor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &handle, 0); ASSERT(pTask->exec.pExecutor); + taosThreadMutexInit(&pTask->lock, NULL); streamSetupScheduleTrigger(pTask); qDebug("snode:%d expand stream task on snode, s-task:%s, checkpoint ver:%" PRId64 " child id:%d, level:%d", SNODE_HANDLE, @@ -158,7 +160,9 @@ int32_t sndProcessTaskDeployReq(SSnode *pSnode, char *msg, int32_t msgLen) { // 2.save task taosWLockLatch(&pSnode->pMeta->lock); - code = streamMetaAddDeployedTask(pSnode->pMeta, -1, pTask); + + bool added = false; + code = streamMetaRegisterTask(pSnode->pMeta, -1, pTask, &added); if (code < 0) { taosWUnLockLatch(&pSnode->pMeta->lock); return -1; @@ -166,11 +170,10 @@ int32_t sndProcessTaskDeployReq(SSnode *pSnode, char *msg, int32_t msgLen) { int32_t numOfTasks = streamMetaGetNumOfTasks(pSnode->pMeta); taosWUnLockLatch(&pSnode->pMeta->lock); - - streamPrepareNdoCheckDownstream(pTask); qDebug("snode:%d s-task:%s is deployed on snode and add into meta, status:%s, numOfTasks:%d", SNODE_HANDLE, pTask->id.idStr, - streamGetTaskStatusStr(pTask->status.taskStatus), numOfTasks); + streamGetTaskStatusStr(pTask->status.taskStatus), numOfTasks); + streamTaskCheckDownstreamTasks(pTask); return 0; } @@ -178,7 +181,14 @@ int32_t sndProcessTaskDropReq(SSnode *pSnode, char *msg, int32_t msgLen) { SVDropStreamTaskReq *pReq = (SVDropStreamTaskReq *)msg; qDebug("snode:%d receive msg to drop stream task:0x%x", pSnode->pMeta->vgId, pReq->taskId); - streamMetaRemoveTask(pSnode->pMeta, pReq->taskId); + SStreamTask* pTask = streamMetaAcquireTask(pSnode->pMeta, pReq->taskId); + if (pTask == NULL) { + qError("vgId:%d failed to acquire s-task:0x%x when dropping it", pSnode->pMeta->vgId, pReq->taskId); + return 0; + } + + streamMetaUnregisterTask(pSnode->pMeta, pReq->taskId); + streamMetaReleaseTask(pSnode->pMeta, pTask); return 0; } @@ -274,7 +284,7 @@ int32_t sndProcessWriteMsg(SSnode *pSnode, SRpcMsg *pMsg, SRpcMsg *pRsp) { return 0; } -int32_t sndProcessTaskRecoverFinishReq(SSnode *pSnode, SRpcMsg *pMsg) { +int32_t sndProcessStreamTaskScanHistoryFinishReq(SSnode *pSnode, SRpcMsg *pMsg) { char *msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); int32_t msgLen = pMsg->contLen - sizeof(SMsgHead); @@ -287,12 +297,12 @@ int32_t sndProcessTaskRecoverFinishReq(SSnode *pSnode, SRpcMsg *pMsg) { tDecoderClear(&decoder); // find task - SStreamTask *pTask = streamMetaAcquireTask(pSnode->pMeta, req.taskId); + SStreamTask *pTask = streamMetaAcquireTask(pSnode->pMeta, req.downstreamTaskId); if (pTask == NULL) { return -1; } // do process request - if (streamProcessScanHistoryFinishReq(pTask, req.taskId, req.childId) < 0) { + if (streamProcessScanHistoryFinishReq(pTask, &req, &pMsg->info) < 0) { streamMetaReleaseTask(pSnode->pMeta, pTask); return -1; } @@ -336,9 +346,9 @@ int32_t sndProcessStreamTaskCheckReq(SSnode *pSnode, SRpcMsg *pMsg) { rsp.status = streamTaskCheckStatus(pTask); streamMetaReleaseTask(pSnode->pMeta, pTask); - qDebug("s-task:%s recv task check req(reqId:0x%" PRIx64 ") task:0x%x (vgId:%d), status:%s, rsp status %d", - pTask->id.idStr, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, - streamGetTaskStatusStr(pTask->status.taskStatus), rsp.status); + const char* pStatus = streamGetTaskStatusStr(pTask->status.taskStatus); + qDebug("s-task:%s status:%s, recv task check req(reqId:0x%" PRIx64 ") task:0x%x (vgId:%d), ready:%d", + pTask->id.idStr, pStatus, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status); } else { rsp.status = 0; qDebug("tq recv task check(taskId:0x%x not built yet) req(reqId:0x%" PRIx64 @@ -415,7 +425,7 @@ int32_t sndProcessStreamMsg(SSnode *pSnode, SRpcMsg *pMsg) { case TDMT_STREAM_RETRIEVE_RSP: return sndProcessTaskRetrieveRsp(pSnode, pMsg); case TDMT_STREAM_SCAN_HISTORY_FINISH: - return sndProcessTaskRecoverFinishReq(pSnode, pMsg); + return sndProcessStreamTaskScanHistoryFinishReq(pSnode, pMsg); case TDMT_STREAM_SCAN_HISTORY_FINISH_RSP: return sndProcessTaskRecoverFinishRsp(pSnode, pMsg); case TDMT_STREAM_TASK_CHECK: diff --git a/source/dnode/snode/src/snodeInitApi.c b/source/dnode/snode/src/snodeInitApi.c index c046505630251092923189eadbc532e87970e4b6..e737e3fa373ed621ec87c3267ce0cb964ae0c19e 100644 --- a/source/dnode/snode/src/snodeInitApi.c +++ b/source/dnode/snode/src/snodeInitApi.c @@ -78,6 +78,8 @@ void initStateStoreAPI(SStateStore* pStore) { pStore->updateInfoIsUpdated = updateInfoIsUpdated; pStore->updateInfoIsTableInserted = updateInfoIsTableInserted; pStore->updateInfoDestroy = updateInfoDestroy; + pStore->windowSBfDelete = windowSBfDelete; + pStore->windowSBfAdd = windowSBfAdd; pStore->updateInfoInitP = updateInfoInitP; pStore->updateInfoAddCloseWindowSBF = updateInfoAddCloseWindowSBF; diff --git a/source/dnode/vnode/CMakeLists.txt b/source/dnode/vnode/CMakeLists.txt index a3ccc720d9eaef5efd60292d8c27a6c168a7649c..194ffa16f671b8b63466d3bb1de559b470547de4 100644 --- a/source/dnode/vnode/CMakeLists.txt +++ b/source/dnode/vnode/CMakeLists.txt @@ -1,21 +1,18 @@ # vnode add_library(vnode STATIC "") -target_sources( - vnode - PRIVATE - - # vnode - "src/vnd/vnodeOpen.c" - "src/vnd/vnodeBufPool.c" - "src/vnd/vnodeCfg.c" - "src/vnd/vnodeCommit.c" - "src/vnd/vnodeQuery.c" - "src/vnd/vnodeModule.c" - "src/vnd/vnodeSvr.c" - "src/vnd/vnodeSync.c" - "src/vnd/vnodeSnapshot.c" - "src/vnd/vnodeRetention.c" - "src/vnd/vnodeInitApi.c" +set( + VNODE_SOURCE_FILES + "src/vnd/vnodeOpen.c" + "src/vnd/vnodeBufPool.c" + "src/vnd/vnodeCfg.c" + "src/vnd/vnodeCommit.c" + "src/vnd/vnodeQuery.c" + "src/vnd/vnodeModule.c" + "src/vnd/vnodeSvr.c" + "src/vnd/vnodeSync.c" + "src/vnd/vnodeSnapshot.c" + "src/vnd/vnodeRetention.c" + "src/vnd/vnodeInitApi.c" # meta "src/meta/metaOpen.c" @@ -38,23 +35,23 @@ target_sources( "src/sma/smaSnapshot.c" "src/sma/smaTimeRange.c" - # tsdb - "src/tsdb/tsdbCommit.c" - "src/tsdb/tsdbFile.c" - "src/tsdb/tsdbFS.c" - "src/tsdb/tsdbOpen.c" - "src/tsdb/tsdbMemTable.c" - "src/tsdb/tsdbRead.c" - "src/tsdb/tsdbCache.c" - "src/tsdb/tsdbWrite.c" - "src/tsdb/tsdbReaderWriter.c" - "src/tsdb/tsdbUtil.c" - "src/tsdb/tsdbSnapshot.c" - "src/tsdb/tsdbCacheRead.c" - "src/tsdb/tsdbRetention.c" - "src/tsdb/tsdbDiskData.c" - "src/tsdb/tsdbMergeTree.c" - "src/tsdb/tsdbDataIter.c" + # # tsdb + # "src/tsdb/tsdbCommit.c" + # "src/tsdb/tsdbFile.c" + # "src/tsdb/tsdbFS.c" + # "src/tsdb/tsdbOpen.c" + # "src/tsdb/tsdbMemTable.c" + # "src/tsdb/tsdbRead.c" + # "src/tsdb/tsdbCache.c" + # "src/tsdb/tsdbWrite.c" + # "src/tsdb/tsdbReaderWriter.c" + # "src/tsdb/tsdbUtil.c" + # "src/tsdb/tsdbSnapshot.c" + # "src/tsdb/tsdbCacheRead.c" + # "src/tsdb/tsdbRetention.c" + # "src/tsdb/tsdbDiskData.c" + # "src/tsdb/tsdbMergeTree.c" + # "src/tsdb/tsdbDataIter.c" # tq "src/tq/tq.c" @@ -71,6 +68,19 @@ target_sources( "src/tq/tqOffsetSnapshot.c" ) +aux_source_directory("src/tsdb/" TSDB_SOURCE_FILES) +list( + APPEND + VNODE_SOURCE_FILES + ${TSDB_SOURCE_FILES} +) + +target_sources( + vnode + PRIVATE + ${VNODE_SOURCE_FILES} +) + IF (TD_VNODE_PLUGINS) target_sources( vnode diff --git a/source/dnode/vnode/inc/vnode.h b/source/dnode/vnode/inc/vnode.h index 38216e1414decf8cabb68ac14c6bbe9051fa1200..a7ce18198dfcd4fe93675a936d43585b83a43d79 100644 --- a/source/dnode/vnode/inc/vnode.h +++ b/source/dnode/vnode/inc/vnode.h @@ -168,6 +168,27 @@ uint64_t tsdbGetReaderMaxVersion(STsdbReader *pReader); void tsdbReaderSetCloseFlag(STsdbReader *pReader); int64_t tsdbGetLastTimestamp(SVnode *pVnode, void *pTableList, int32_t numOfTables, const char *pIdStr); +//====================================================================================================================== +int32_t tsdbReaderOpen2(void *pVnode, SQueryTableDataCond *pCond, void *pTableList, int32_t numOfTables, + SSDataBlock *pResBlock, void **ppReader, const char *idstr, bool countOnly, + SHashObj **pIgnoreTables); +int32_t tsdbSetTableList2(STsdbReader *pReader, const void *pTableList, int32_t num); +void tsdbReaderSetId2(STsdbReader *pReader, const char *idstr); +void tsdbReaderClose2(STsdbReader *pReader); +int32_t tsdbNextDataBlock2(STsdbReader *pReader, bool *hasNext); +int32_t tsdbRetrieveDatablockSMA2(STsdbReader *pReader, SSDataBlock *pDataBlock, bool *allHave, bool *hasNullSMA); +void tsdbReleaseDataBlock2(STsdbReader *pReader); +SSDataBlock *tsdbRetrieveDataBlock2(STsdbReader *pTsdbReadHandle, SArray *pColumnIdList); +int32_t tsdbReaderReset2(STsdbReader *pReader, SQueryTableDataCond *pCond); +int32_t tsdbGetFileBlocksDistInfo2(STsdbReader *pReader, STableBlockDistInfo *pTableBlockInfo); +int64_t tsdbGetNumOfRowsInMemTable2(STsdbReader *pHandle); +void *tsdbGetIdx2(SMeta *pMeta); +void *tsdbGetIvtIdx2(SMeta *pMeta); +uint64_t tsdbGetReaderMaxVersion2(STsdbReader *pReader); +void tsdbReaderSetCloseFlag2(STsdbReader *pReader); +int64_t tsdbGetLastTimestamp2(SVnode *pVnode, void *pTableList, int32_t numOfTables, const char *pIdStr); +//====================================================================================================================== + int32_t tsdbReuseCacherowsReader(void *pReader, void *pTableIdList, int32_t numOfTables); int32_t tsdbCacherowsReaderOpen(void *pVnode, int32_t type, void *pTableIdList, int32_t numOfTables, int32_t numOfCols, SArray *pCidList, int32_t *pSlotIds, uint64_t suid, void **pReader, const char *idstr); @@ -220,7 +241,7 @@ bool tqNextBlockImpl(STqReader *pReader, const char *idstr); SWalReader* tqGetWalReader(STqReader* pReader); SSDataBlock* tqGetResultBlock (STqReader* pReader); -int32_t extractMsgFromWal(SWalReader *pReader, void **pItem, const char *id); +int32_t extractMsgFromWal(SWalReader *pReader, void **pItem, int64_t maxVer, const char *id); int32_t tqReaderSetSubmitMsg(STqReader *pReader, void *msgStr, int32_t msgLen, int64_t ver); bool tqNextDataBlockFilterOut(STqReader *pReader, SHashObj *filterOutUids); int32_t tqRetrieveDataBlock(STqReader *pReader, SSDataBlock** pRes, const char* idstr); diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index b77bb54714c09254b61746f807e2e04e26a8d4cc..13b991e0386e8419b542f0be8406fa7cdd20254d 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -172,7 +172,6 @@ int32_t tqStreamTasksStatusCheck(STQ* pTq); // tq util int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, SStreamRefDataBlock** pRefBlock); -int32_t tqAddInputBlockNLaunchTask(SStreamTask* pTask, SStreamQueueItem* pQueueItem); int32_t tqExtractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, SRpcMsg* pMsg); int32_t tqDoSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const SMqDataRsp* pRsp, int32_t epoch, int64_t consumerId, int32_t type, int64_t sver, int64_t ever); diff --git a/source/dnode/vnode/src/inc/tsdb.h b/source/dnode/vnode/src/inc/tsdb.h index 71af1697528409c6423a4453b0777215bdd74126..fa42248c695a522b21e71fac00bebe7f7a66bae0 100644 --- a/source/dnode/vnode/src/inc/tsdb.h +++ b/source/dnode/vnode/src/inc/tsdb.h @@ -16,6 +16,9 @@ #ifndef _TD_VNODE_TSDB_H_ #define _TD_VNODE_TSDB_H_ +// #include "../tsdb/tsdbFile2.h" +// #include "../tsdb/tsdbMerge.h" +// #include "../tsdb/tsdbSttFileRW.h" #include "tsimplehash.h" #include "vnodeInt.h" @@ -75,9 +78,8 @@ typedef struct STsdbFilterInfo STsdbFilterInfo; #define TSDBROW_ROW_FMT ((int8_t)0x0) #define TSDBROW_COL_FMT ((int8_t)0x1) -#define TSDB_FILE_DLMT ((uint32_t)0xF00AFA0F) -#define TSDB_MAX_SUBBLOCKS 8 -#define TSDB_FHDR_SIZE 512 +#define TSDB_FILE_DLMT ((uint32_t)0xF00AFA0F) +#define TSDB_FHDR_SIZE 512 #define VERSION_MIN 0 #define VERSION_MAX INT64_MAX @@ -165,6 +167,7 @@ void tBlockDataDestroy(SBlockData *pBlockData); int32_t tBlockDataInit(SBlockData *pBlockData, TABLEID *pId, STSchema *pTSchema, int16_t *aCid, int32_t nCid); void tBlockDataReset(SBlockData *pBlockData); int32_t tBlockDataAppendRow(SBlockData *pBlockData, TSDBROW *pRow, STSchema *pTSchema, int64_t uid); +int32_t tBlockDataUpdateRow(SBlockData *pBlockData, TSDBROW *pRow, STSchema *pTSchema); int32_t tBlockDataTryUpsertRow(SBlockData *pBlockData, TSDBROW *pRow, int64_t uid); int32_t tBlockDataUpsertRow(SBlockData *pBlockData, TSDBROW *pRow, STSchema *pTSchema, int64_t uid); void tBlockDataClear(SBlockData *pBlockData); @@ -198,7 +201,7 @@ int32_t tMapDataToArray(SMapData *pMapData, int32_t itemSize, int32_t (*tGetItem // other int32_t tsdbKeyFid(TSKEY key, int32_t minutes, int8_t precision); void tsdbFidKeyRange(int32_t fid, int32_t minutes, int8_t precision, TSKEY *minKey, TSKEY *maxKey); -int32_t tsdbFidLevel(int32_t fid, STsdbKeepCfg *pKeepCfg, int64_t now); +int32_t tsdbFidLevel(int32_t fid, STsdbKeepCfg *pKeepCfg, int64_t nowSec); int32_t tsdbBuildDeleteSkyline(SArray *aDelData, int32_t sidx, int32_t eidx, SArray *aSkyline); int32_t tPutColumnDataAgg(uint8_t *p, SColumnDataAgg *pColAgg); int32_t tGetColumnDataAgg(uint8_t *p, SColumnDataAgg *pColAgg); @@ -302,8 +305,11 @@ int32_t tsdbReadDelIdx(SDelFReader *pReader, SArray *aDelIdx); // tsdbRead.c ============================================================================================== int32_t tsdbTakeReadSnap(STsdbReader *pReader, _query_reseek_func_t reseek, STsdbReadSnap **ppSnap); void tsdbUntakeReadSnap(STsdbReader *pReader, STsdbReadSnap *pSnap, bool proactive); + +int32_t tsdbTakeReadSnap2(STsdbReader *pReader, _query_reseek_func_t reseek, STsdbReadSnap **ppSnap); +void tsdbUntakeReadSnap2(STsdbReader *pReader, STsdbReadSnap *pSnap, bool proactive); // tsdbMerge.c ============================================================================================== -int32_t tsdbMerge(STsdb *pTsdb); +int32_t tsdbMerge(void *arg); // tsdbDiskData ============================================================================================== int32_t tDiskDataBuilderCreate(SDiskDataBuilder **ppBuilder); @@ -362,19 +368,20 @@ typedef struct { } SCacheFlushState; struct STsdb { - char *path; - SVnode *pVnode; - STsdbKeepCfg keepCfg; - TdThreadRwlock rwLock; - SMemTable *mem; - SMemTable *imem; - STsdbFS fs; - SLRUCache *lruCache; - SCacheFlushState flushState; - TdThreadMutex lruMutex; - SLRUCache *biCache; - TdThreadMutex biMutex; - SRocksCache rCache; + char *path; + SVnode *pVnode; + STsdbKeepCfg keepCfg; + TdThreadRwlock rwLock; + SMemTable *mem; + SMemTable *imem; + STsdbFS fs; // old + SLRUCache *lruCache; + SCacheFlushState flushState; + TdThreadMutex lruMutex; + SLRUCache *biCache; + TdThreadMutex biMutex; + struct STFileSystem *pFS; // new + SRocksCache rCache; }; struct TSDBKEY { @@ -410,6 +417,7 @@ struct STbData { SDelData *pTail; SMemSkipList sl; STbData *next; + SRBTreeNode rbtn[1]; }; struct SMemTable { @@ -423,11 +431,10 @@ struct SMemTable { TSKEY maxKey; int64_t nRow; int64_t nDel; - struct { - int32_t nTbData; - int32_t nBucket; - STbData **aBucket; - }; + int32_t nTbData; + int32_t nBucket; + STbData **aBucket; + SRBTree tbDataTree[1]; }; struct TSDBROW { @@ -500,7 +507,7 @@ struct SDataBlk { int32_t nRow; int8_t hasDup; int8_t nSubBlock; - SBlockInfo aSubBlock[TSDB_MAX_SUBBLOCKS]; + SBlockInfo aSubBlock[1]; SSmaInfo smaInfo; }; @@ -652,12 +659,19 @@ struct SDelFWriter { uint8_t *aBuf[1]; }; +#include "tarray2.h" +//#include "tsdbFS2.h" +// struct STFileSet; +typedef struct STFileSet STFileSet; +typedef TARRAY2(STFileSet *) TFileSetArray; + struct STsdbReadSnap { - SMemTable *pMem; - SQueryNode *pNode; - SMemTable *pIMem; - SQueryNode *pINode; - STsdbFS fs; + SMemTable *pMem; + SQueryNode *pNode; + SMemTable *pIMem; + SQueryNode *pINode; + TFileSetArray *pfSetArray; + STsdbFS fs; }; struct SDataFWriter { @@ -696,6 +710,7 @@ typedef struct { typedef struct SSttBlockLoadInfo { SBlockData blockData[2]; + void *pSttStatisBlkArray; SArray *aSttBlk; int32_t blockIndex[2]; // to denote the loaded block in the corresponding position. int32_t currentLoadBlockIndex; @@ -704,10 +719,9 @@ typedef struct SSttBlockLoadInfo { STSchema *pSchema; int16_t *colIds; int32_t numOfCols; - bool checkRemainingRow; + bool checkRemainingRow; // todo: no assign value? bool isLast; bool sttBlockLoaded; - int32_t numOfStt; // keep the last access position, this position may be used to reduce the binary times for // starting last block data for a new table @@ -766,61 +780,66 @@ struct SDiskDataBuilder { }; typedef struct SLDataIter { - SRBTreeNode node; - SSttBlk *pSttBlk; - SDataFReader *pReader; - int32_t iStt; - int8_t backward; - int32_t iSttBlk; - int32_t iRow; - SRowInfo rInfo; - uint64_t uid; - STimeWindow timeWindow; - SVersionRange verRange; - SSttBlockLoadInfo *pBlockLoadInfo; - bool ignoreEarlierTs; + SRBTreeNode node; + SSttBlk *pSttBlk; + int32_t iStt; // for debug purpose + int8_t backward; + int32_t iSttBlk; + int32_t iRow; + SRowInfo rInfo; + uint64_t uid; + STimeWindow timeWindow; + SVersionRange verRange; + SSttBlockLoadInfo *pBlockLoadInfo; + bool ignoreEarlierTs; + struct SSttFileReader *pReader; } SLDataIter; #define tMergeTreeGetRow(_t) (&((_t)->pIter->rInfo.row)) int32_t tMergeTreeOpen(SMergeTree *pMTree, int8_t backward, SDataFReader *pFReader, uint64_t suid, uint64_t uid, STimeWindow *pTimeWindow, SVersionRange *pVerRange, SSttBlockLoadInfo *pBlockLoadInfo, bool destroyLoadInfo, const char *idStr, bool strictTimeRange, SLDataIter *pLDataIter); -void tMergeTreeAddIter(SMergeTree *pMTree, SLDataIter *pIter); -bool tMergeTreeNext(SMergeTree *pMTree); -bool tMergeTreeIgnoreEarlierTs(SMergeTree *pMTree); -void tMergeTreeClose(SMergeTree *pMTree); + +struct SSttFileReader; +typedef int32_t (*_load_tomb_fn)(STsdbReader *pReader, struct SSttFileReader *pSttFileReader, + SSttBlockLoadInfo *pLoadInfo); + +typedef struct { + int8_t backward; + STsdb *pTsdb; + uint64_t suid; + uint64_t uid; + STimeWindow timewindow; + SVersionRange verRange; + bool strictTimeRange; + SArray *pSttFileBlockIterArray; + void *pCurrentFileset; + STSchema *pSchema; + int16_t *pCols; + int32_t numOfCols; + _load_tomb_fn loadTombFn; + void *pReader; + void *idstr; +} SMergeTreeConf; +int32_t tMergeTreeOpen2(SMergeTree *pMTree, SMergeTreeConf *pConf); + +void tMergeTreeAddIter(SMergeTree *pMTree, SLDataIter *pIter); +bool tMergeTreeNext(SMergeTree *pMTree); +bool tMergeTreeIgnoreEarlierTs(SMergeTree *pMTree); +void tMergeTreeClose(SMergeTree *pMTree); SSttBlockLoadInfo *tCreateLastBlockLoadInfo(STSchema *pSchema, int16_t *colList, int32_t numOfCols, int32_t numOfStt); +SSttBlockLoadInfo *tCreateOneLastBlockLoadInfo(STSchema *pSchema, int16_t *colList, int32_t numOfCols); void resetLastBlockLoadInfo(SSttBlockLoadInfo *pLoadInfo); void getLastBlockLoadInfo(SSttBlockLoadInfo *pLoadInfo, int64_t *blocks, double *el); void *destroyLastBlockLoadInfo(SSttBlockLoadInfo *pLoadInfo); +void *destroySttBlockReader(SArray *pLDataIterArray, int64_t *blocks, double *el); // tsdbCache ============================================================================================== -typedef struct SCacheRowsReader { - STsdb *pTsdb; - SVersionRange verRange; - TdThreadMutex readerMutex; - SVnode *pVnode; - STSchema *pSchema; - STSchema *pCurrSchema; - uint64_t uid; - uint64_t suid; - char **transferBuf; // todo remove it soon - int32_t numOfCols; - SArray *pCidList; - int32_t *pSlotIds; - int32_t type; - int32_t tableIndex; // currently returned result tables - STableKeyInfo *pTableList; // table id list - int32_t numOfTables; - SSttBlockLoadInfo *pLoadInfo; - SLDataIter *pDataIter; - STsdbReadSnap *pReadSnap; - SDataFReader *pDataFReader; - SDataFReader *pDataFReaderLast; - const char *idstr; - int64_t lastTs; -} SCacheRowsReader; +typedef enum { + READ_MODE_COUNT_ONLY = 0x1, + READ_MODE_ALL, +} EReadMode; typedef struct { TSKEY ts; @@ -831,14 +850,10 @@ typedef struct { int32_t tsdbOpenCache(STsdb *pTsdb); void tsdbCloseCache(STsdb *pTsdb); int32_t tsdbCacheUpdate(STsdb *pTsdb, tb_uid_t suid, tb_uid_t uid, TSDBROW *row); -int32_t tsdbCacheGetBatch(STsdb *pTsdb, tb_uid_t uid, SArray *pLastArray, SCacheRowsReader *pr, int8_t ltype); -int32_t tsdbCacheGet(STsdb *pTsdb, tb_uid_t uid, SArray *pLastArray, SCacheRowsReader *pr, int8_t ltype); int32_t tsdbCacheDel(STsdb *pTsdb, tb_uid_t suid, tb_uid_t uid, TSKEY sKey, TSKEY eKey); int32_t tsdbCacheInsertLast(SLRUCache *pCache, tb_uid_t uid, TSDBROW *row, STsdb *pTsdb); int32_t tsdbCacheInsertLastrow(SLRUCache *pCache, STsdb *pTsdb, tb_uid_t uid, TSDBROW *row, bool dup); -int32_t tsdbCacheGetLastH(SLRUCache *pCache, tb_uid_t uid, SCacheRowsReader *pr, LRUHandle **h); -int32_t tsdbCacheGetLastrowH(SLRUCache *pCache, tb_uid_t uid, SCacheRowsReader *pr, LRUHandle **h); int32_t tsdbCacheRelease(SLRUCache *pCache, LRUHandle *h); int32_t tsdbCacheGetBlockIdx(SLRUCache *pCache, SDataFReader *pFileReader, LRUHandle **handle); @@ -848,8 +863,6 @@ int32_t tsdbCacheDeleteLastrow(SLRUCache *pCache, tb_uid_t uid, TSKEY eKey); int32_t tsdbCacheDeleteLast(SLRUCache *pCache, tb_uid_t uid, TSKEY eKey); int32_t tsdbCacheDelete(SLRUCache *pCache, tb_uid_t uid, TSKEY eKey); -// int32_t tsdbCacheLastArray2Row(SArray *pLastArray, STSRow **ppRow, STSchema *pSchema); - // ========== inline functions ========== static FORCE_INLINE int32_t tsdbKeyCmprFn(const void *p1, const void *p2) { TSDBKEY *pKey1 = (TSDBKEY *)p1; diff --git a/source/dnode/vnode/src/inc/vnd.h b/source/dnode/vnode/src/inc/vnd.h index a75d5d9307afc4a11f22b9fa3bef8d53281516bf..55b62dfe48fe49fe7ea78d8323c5125619dfdbdc 100644 --- a/source/dnode/vnode/src/inc/vnd.h +++ b/source/dnode/vnode/src/inc/vnd.h @@ -49,7 +49,8 @@ int32_t vnodeEncodeConfig(const void* pObj, SJson* pJson); int32_t vnodeDecodeConfig(const SJson* pJson, void* pObj); // vnodeModule.c -int32_t vnodeScheduleTask(int32_t (*execute)(void*), void* arg); +int vnodeScheduleTask(int (*execute)(void*), void* arg); +int vnodeScheduleTaskEx(int tpid, int (*execute)(void*), void* arg); // vnodeBufPool.c typedef struct SVBufPoolNode SVBufPoolNode; diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index f5a610efc7ccb662bac72f907e701708ecd77e18..cd7704940b3a017dae7d12e15647fc61ca8fd542 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -180,8 +180,8 @@ SArray* metaGetSmaTbUids(SMeta* pMeta); void* metaGetIdx(SMeta* pMeta); void* metaGetIvtIdx(SMeta* pMeta); -int64_t metaGetTbNum(SMeta *pMeta); -void metaReaderDoInit(SMetaReader *pReader, SMeta *pMeta, int32_t flags); +int64_t metaGetTbNum(SMeta* pMeta); +void metaReaderDoInit(SMetaReader* pReader, SMeta* pMeta, int32_t flags); int32_t metaCreateTSma(SMeta* pMeta, int64_t version, SSmaCfg* pCfg); int32_t metaDropTSma(SMeta* pMeta, int64_t indexUid); @@ -198,12 +198,12 @@ int32_t metaGetInfo(SMeta* pMeta, int64_t uid, SMetaInfo* pInfo, SMetaReader* pR int tsdbOpen(SVnode* pVnode, STsdb** ppTsdb, const char* dir, STsdbKeepCfg* pKeepCfg, int8_t rollback); int tsdbClose(STsdb** pTsdb); int32_t tsdbBegin(STsdb* pTsdb); -int32_t tsdbPrepareCommit(STsdb* pTsdb); -int32_t tsdbCommit(STsdb* pTsdb, SCommitInfo* pInfo); +// int32_t tsdbPrepareCommit(STsdb* pTsdb); +// int32_t tsdbCommit(STsdb* pTsdb, SCommitInfo* pInfo); int32_t tsdbCacheCommit(STsdb* pTsdb); int32_t tsdbCompact(STsdb* pTsdb, SCompactInfo* pInfo); -int32_t tsdbFinishCommit(STsdb* pTsdb); -int32_t tsdbRollbackCommit(STsdb* pTsdb); +// int32_t tsdbFinishCommit(STsdb* pTsdb); +// int32_t tsdbRollbackCommit(STsdb* pTsdb); int tsdbScanAndConvertSubmitMsg(STsdb* pTsdb, SSubmitReq2* pMsg); int tsdbInsertData(STsdb* pTsdb, int64_t version, SSubmitReq2* pMsg, SSubmitRsp2* pRsp); int32_t tsdbInsertTableData(STsdb* pTsdb, int64_t version, SSubmitTbData* pSubmitTbData, int32_t* affectedRows); @@ -231,10 +231,11 @@ int32_t tqProcessDelCheckInfoReq(STQ* pTq, int64_t version, char* msg, int32_t m int32_t tqProcessSubscribeReq(STQ* pTq, int64_t version, char* msg, int32_t msgLen); int32_t tqProcessDeleteSubReq(STQ* pTq, int64_t version, char* msg, int32_t msgLen); int32_t tqProcessOffsetCommitReq(STQ* pTq, int64_t version, char* msg, int32_t msgLen); -int32_t tqProcessSeekReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen); +int32_t tqProcessSeekReq(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessPollPush(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessVgWalInfoReq(STQ* pTq, SRpcMsg* pMsg); +int32_t tqProcessVgCommittedInfoReq(STQ* pTq, SRpcMsg* pMsg); // tq-stream int32_t tqProcessTaskDeployReq(STQ* pTq, int64_t version, char* msg, int32_t msgLen); @@ -250,8 +251,8 @@ int32_t tqProcessTaskRetrieveReq(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskRetrieveRsp(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskTransferStateReq(STQ* pTq, SRpcMsg* pMsg); -int32_t tqProcessStreamTaskScanHistoryFinishReq(STQ* pTq, SRpcMsg* pMsg); -int32_t tqProcessTaskRecoverFinishRsp(STQ* pTq, SRpcMsg* pMsg); +int32_t tqProcessTaskScanHistoryFinishReq(STQ* pTq, SRpcMsg* pMsg); +int32_t tqProcessTaskScanHistoryFinishRsp(STQ* pTq, SRpcMsg* pMsg); int32_t tqCheckLogInWal(STQ* pTq, int64_t version); // sma diff --git a/source/dnode/vnode/src/meta/metaTable.c b/source/dnode/vnode/src/meta/metaTable.c index ad96004098af64da70b13061d44d6467bb5ffecd..632e6dd872be9b7381309653404b88f4dbf5152d 100644 --- a/source/dnode/vnode/src/meta/metaTable.c +++ b/source/dnode/vnode/src/meta/metaTable.c @@ -455,7 +455,7 @@ int metaAddIndexToSTable(SMeta *pMeta, int64_t version, SVCreateStbReq *pReq) { } } - if (diffIdx == -1 && diffIdx == 0) { + if (diffIdx == -1 || diffIdx == 0) { goto _err; } @@ -1662,10 +1662,11 @@ static int metaAddTagIndex(SMeta *pMeta, int64_t version, SVAlterTbReq *pAlterTb if (ret < 0) { terrno = TSDB_CODE_TDB_TABLE_NOT_EXIST; return -1; + } else { + uid = *(tb_uid_t *)pVal; + tdbFree(pVal); + pVal = NULL; } - uid = *(tb_uid_t *)pVal; - tdbFree(pVal); - pVal = NULL; if (tdbTbGet(pMeta->pUidIdx, &uid, sizeof(tb_uid_t), &pVal, &nVal) == -1) { ret = -1; @@ -1744,12 +1745,16 @@ static int metaAddTagIndex(SMeta *pMeta, int64_t version, SVAlterTbReq *pAlterTb nTagData = tDataTypes[pCol->type].bytes; } if (metaCreateTagIdxKey(suid, pCol->colId, pTagData, nTagData, pCol->type, uid, &pTagIdxKey, &nTagIdxKey) < 0) { + tdbFree(pKey); + tdbFree(pVal); metaDestroyTagIdxKey(pTagIdxKey); + tdbTbcClose(pCtbIdxc); goto _err; } tdbTbUpsert(pMeta->pTagIdx, pTagIdxKey, nTagIdxKey, NULL, 0, pMeta->txn); metaDestroyTagIdxKey(pTagIdxKey); } + tdbTbcClose(pCtbIdxc); return 0; _err: diff --git a/source/dnode/vnode/src/meta/metaTtl.c b/source/dnode/vnode/src/meta/metaTtl.c index 045a759fad23c1c8cb1986cafb0f8a1b6062de32..45f697258c4e8e599f1d0a2179ce10db03b6f3f5 100644 --- a/source/dnode/vnode/src/meta/metaTtl.c +++ b/source/dnode/vnode/src/meta/metaTtl.c @@ -379,7 +379,7 @@ _out: int ttlMgrFlush(STtlManger *pTtlMgr, TXN *pTxn) { ttlMgrWLock(pTtlMgr); - metaInfo("%s, ttl mgr flush start. dirty uids:%d", pTtlMgr->logPrefix, taosHashGetSize(pTtlMgr->pDirtyUids)); + metaDebug("%s, ttl mgr flush start. dirty uids:%d", pTtlMgr->logPrefix, taosHashGetSize(pTtlMgr->pDirtyUids)); int ret = -1; @@ -433,7 +433,7 @@ int ttlMgrFlush(STtlManger *pTtlMgr, TXN *pTxn) { _out: ttlMgrULock(pTtlMgr); - metaInfo("%s, ttl mgr flush end.", pTtlMgr->logPrefix); + metaDebug("%s, ttl mgr flush end.", pTtlMgr->logPrefix); return ret; } diff --git a/source/dnode/vnode/src/sma/smaCommit.c b/source/dnode/vnode/src/sma/smaCommit.c index d1c431409182eb322e260dcda8388ec39311903e..c26157f4b7ab260eda5a0244bbc9c6bc5ee812d1 100644 --- a/source/dnode/vnode/src/sma/smaCommit.c +++ b/source/dnode/vnode/src/sma/smaCommit.c @@ -103,15 +103,16 @@ _exit: return code; } -int32_t smaFinishCommit(SSma *pSma) { +extern int32_t tsdbCommitCommit(STsdb *tsdb); +int32_t smaFinishCommit(SSma *pSma) { int32_t code = 0; int32_t lino = 0; SVnode *pVnode = pSma->pVnode; - if (VND_RSMA1(pVnode) && (code = tsdbFinishCommit(VND_RSMA1(pVnode))) < 0) { + if (VND_RSMA1(pVnode) && (code = tsdbCommitCommit(VND_RSMA1(pVnode))) < 0) { TSDB_CHECK_CODE(code, lino, _exit); } - if (VND_RSMA2(pVnode) && (code = tsdbFinishCommit(VND_RSMA2(pVnode))) < 0) { + if (VND_RSMA2(pVnode) && (code = tsdbCommitCommit(VND_RSMA2(pVnode))) < 0) { TSDB_CHECK_CODE(code, lino, _exit); } _exit: @@ -130,6 +131,7 @@ _exit: * @param isCommit * @return int32_t */ +extern int32_t tsdbPreCommit(STsdb *tsdb); static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma, bool isCommit) { int32_t code = 0; int32_t lino = 0; @@ -186,11 +188,11 @@ static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma, bool isCommit) { // all rsma results are written completely STsdb *pTsdb = NULL; if ((pTsdb = VND_RSMA1(pSma->pVnode))) { - code = tsdbPrepareCommit(pTsdb); + code = tsdbPreCommit(pTsdb); TSDB_CHECK_CODE(code, lino, _exit); } if ((pTsdb = VND_RSMA2(pSma->pVnode))) { - code = tsdbPrepareCommit(pTsdb); + code = tsdbPreCommit(pTsdb); TSDB_CHECK_CODE(code, lino, _exit); } @@ -207,6 +209,7 @@ _exit: * @param pSma * @return int32_t */ +extern int32_t tsdbCommitBegin(STsdb *tsdb, SCommitInfo *info); static int32_t tdProcessRSmaAsyncCommitImpl(SSma *pSma, SCommitInfo *pInfo) { int32_t code = 0; int32_t lino = 0; @@ -217,10 +220,10 @@ static int32_t tdProcessRSmaAsyncCommitImpl(SSma *pSma, SCommitInfo *pInfo) { goto _exit; } - code = tsdbCommit(VND_RSMA1(pVnode), pInfo); + code = tsdbCommitBegin(VND_RSMA1(pVnode), pInfo); TSDB_CHECK_CODE(code, lino, _exit); - code = tsdbCommit(VND_RSMA2(pVnode), pInfo); + code = tsdbCommitBegin(VND_RSMA2(pVnode), pInfo); TSDB_CHECK_CODE(code, lino, _exit); _exit: diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 490bcd1238e6e962477d949d7975211fc452a5f7..9fd493844877d343e07d9f251b6b0882f25f89e0 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -264,7 +264,7 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat return TSDB_CODE_FAILED; } - SReadHandle handle = { .vnode = pVnode, .initTqReader = 1, .pStateBackend = pStreamState }; + SReadHandle handle = {.vnode = pVnode, .initTqReader = 1, .pStateBackend = pStreamState}; initStorageAPI(&handle.api); pRSmaInfo->taskInfo[idx] = qCreateStreamExecTaskInfo(param->qmsg[idx], &handle, TD_VID(pVnode)); @@ -572,8 +572,8 @@ int32_t smaDoRetention(SSma *pSma, int64_t now) { for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { if (pSma->pRSmaTsdb[i]) { - code = tsdbDoRetention(pSma->pRSmaTsdb[i], now); - if (code) goto _end; + // code = tsdbDoRetention(pSma->pRSmaTsdb[i], now); + // if (code) goto _end; } } @@ -612,7 +612,6 @@ static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSma blockDebugShowDataBlocks(pResList, flag); #endif for (int32_t i = 0; i < taosArrayGetSize(pResList); ++i) { - output = taosArrayGetP(pResList, i); smaDebug("vgId:%d, result block, uid:%" PRIu64 ", groupid:%" PRIu64 ", rows:%" PRIi64, SMA_VID(pSma), output->info.id.uid, output->info.id.groupId, output->info.rows); @@ -1114,8 +1113,8 @@ static void tdRSmaFetchTrigger(void *param, void *tmrId) { } if (!(pStat = (SRSmaStat *)tdAcquireSmaRef(smaMgmt.rsetId, pRSmaRef->refId))) { - smaWarn("rsma fetch task not start since rsma stat already destroyed, rsetId:%d refId:%" PRIi64 ")", - smaMgmt.rsetId, pRSmaRef->refId); // pRSmaRef freed in taosHashRemove + smaWarn("rsma fetch task not start since rsma stat already destroyed, rsetId:%d refId:%" PRIi64 ")", smaMgmt.rsetId, + pRSmaRef->refId); // pRSmaRef freed in taosHashRemove taosHashRemove(smaMgmt.refHash, ¶m, POINTER_BYTES); return; } diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index bbdd98e35695027f41a4702101d438aaabac32be..4d433042ad9be3f741cecd9edb994aa870e5c7e4 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -83,9 +83,9 @@ void tqDestroyTqHandle(void* data) { } } -static bool tqOffsetLessOrEqual(const STqOffset* pLeft, const STqOffset* pRight) { +static bool tqOffsetEqual(const STqOffset* pLeft, const STqOffset* pRight) { return pLeft->val.type == TMQ_OFFSET__LOG && pRight->val.type == TMQ_OFFSET__LOG && - pLeft->val.version <= pRight->val.version; + pLeft->val.version == pRight->val.version; } STQ* tqOpen(const char* path, SVnode* pVnode) { @@ -146,6 +146,20 @@ void tqClose(STQ* pTq) { return; } + void* pIter = taosHashIterate(pTq->pPushMgr, NULL); + while (pIter) { + STqHandle* pHandle = *(STqHandle**)pIter; + int32_t vgId = TD_VID(pTq->pVnode); + + if(pHandle->msg != NULL) { + tqPushEmptyDataRsp(pHandle, vgId); + rpcFreeCont(pHandle->msg->pCont); + taosMemoryFree(pHandle->msg); + pHandle->msg = NULL; + } + pIter = taosHashIterate(pTq->pPushMgr, pIter); + } + tqOffsetClose(pTq->pOffsetStore); taosHashCleanup(pTq->pHandle); taosHashCleanup(pTq->pPushMgr); @@ -278,6 +292,10 @@ int32_t tqPushEmptyDataRsp(STqHandle* pHandle, int32_t vgId) { tqInitDataRsp(&dataRsp, &req); dataRsp.blockNum = 0; dataRsp.rspOffset = dataRsp.reqOffset; + char buf[TSDB_OFFSET_LEN] = {0}; + tFormatOffset(buf, TSDB_OFFSET_LEN, &dataRsp.reqOffset); + tqInfo("tqPushEmptyDataRsp to consumer:0x%"PRIx64 " vgId:%d, offset:%s, reqId:0x%" PRIx64, req.consumerId, vgId, buf, req.reqId); + tqSendDataRsp(pHandle, pHandle->msg, &req, &dataRsp, TMQ_MSG_TYPE__POLL_DATA_RSP, vgId); tDeleteMqDataRsp(&dataRsp); return 0; @@ -341,17 +359,14 @@ int32_t tqProcessOffsetCommitReq(STQ* pTq, int64_t sversion, char* msg, int32_t } else if (pOffset->val.type == TMQ_OFFSET__LOG) { tqDebug("receive offset commit msg to %s on vgId:%d, offset(type:log) version:%" PRId64, pOffset->subKey, vgId, pOffset->val.version); - if (pOffset->val.version + 1 == sversion) { - pOffset->val.version += 1; - } } else { tqError("invalid commit offset type:%d", pOffset->val.type); return -1; } STqOffset* pSavedOffset = tqOffsetRead(pTq->pOffsetStore, pOffset->subKey); - if (pSavedOffset != NULL && tqOffsetLessOrEqual(pOffset, pSavedOffset)) { - tqDebug("not update the offset, vgId:%d sub:%s since committed:%" PRId64 " less than/equal to existed:%" PRId64, + if (pSavedOffset != NULL && tqOffsetEqual(pOffset, pSavedOffset)) { + tqInfo("not update the offset, vgId:%d sub:%s since committed:%" PRId64 " less than/equal to existed:%" PRId64, vgId, pOffset->subKey, pOffset->val.version, pSavedOffset->val.version); return 0; // no need to update the offset value } @@ -364,86 +379,125 @@ int32_t tqProcessOffsetCommitReq(STQ* pTq, int64_t sversion, char* msg, int32_t return 0; } -int32_t tqProcessSeekReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) { - SMqVgOffset vgOffset = {0}; +int32_t tqProcessSeekReq(STQ* pTq, SRpcMsg* pMsg) { + SMqSeekReq req = {0}; int32_t vgId = TD_VID(pTq->pVnode); + SRpcMsg rsp = {.info = pMsg->info}; + int code = 0; - SDecoder decoder; - tDecoderInit(&decoder, (uint8_t*)msg, msgLen); - if (tDecodeMqVgOffset(&decoder, &vgOffset) < 0) { - tqError("vgId:%d failed to decode seek msg", vgId); - return -1; + if (tDeserializeSMqSeekReq(pMsg->pCont, pMsg->contLen, &req) < 0) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto end; } - tDecoderClear(&decoder); - - tqDebug("topic:%s, vgId:%d process offset seek by consumer:0x%" PRIx64 ", req offset:%" PRId64, - vgOffset.offset.subKey, vgId, vgOffset.consumerId, vgOffset.offset.val.version); + tqDebug("tmq seek: consumer:0x%" PRIx64 " vgId:%d, subkey %s", req.consumerId, vgId, req.subKey); - STqOffset* pOffset = &vgOffset.offset; - if (pOffset->val.type != TMQ_OFFSET__LOG) { - tqError("vgId:%d, subKey:%s invalid seek offset type:%d", vgId, pOffset->subKey, pOffset->val.type); - return -1; - } - - STqHandle* pHandle = taosHashGet(pTq->pHandle, pOffset->subKey, strlen(pOffset->subKey)); + STqHandle* pHandle = taosHashGet(pTq->pHandle, req.subKey, strlen(req.subKey)); if (pHandle == NULL) { - tqError("tmq seek: consumer:0x%" PRIx64 " vgId:%d subkey %s not found", vgOffset.consumerId, vgId, pOffset->subKey); - terrno = TSDB_CODE_INVALID_MSG; - return -1; + tqWarn("tmq seek: consumer:0x%" PRIx64 " vgId:%d subkey %s not found", req.consumerId, vgId, req.subKey); + code = 0; + goto end; } // 2. check consumer-vg assignment status taosRLockLatch(&pTq->lock); - if (pHandle->consumerId != vgOffset.consumerId) { - tqDebug("ERROR tmq seek: consumer:0x%" PRIx64 " vgId:%d, subkey %s, mismatch for saved handle consumer:0x%" PRIx64, - vgOffset.consumerId, vgId, pOffset->subKey, pHandle->consumerId); - terrno = TSDB_CODE_TMQ_CONSUMER_MISMATCH; + if (pHandle->consumerId != req.consumerId) { + tqError("ERROR tmq seek: consumer:0x%" PRIx64 " vgId:%d, subkey %s, mismatch for saved handle consumer:0x%" PRIx64, + req.consumerId, vgId, req.subKey, pHandle->consumerId); taosRUnLockLatch(&pTq->lock); - return -1; + code = TSDB_CODE_TMQ_CONSUMER_MISMATCH; + goto end; } - taosRUnLockLatch(&pTq->lock); - // 3. check the offset info - STqOffset* pSavedOffset = tqOffsetRead(pTq->pOffsetStore, pOffset->subKey); - if (pSavedOffset != NULL) { - if (pSavedOffset->val.type != TMQ_OFFSET__LOG) { - tqError("invalid saved offset type, vgId:%d sub:%s", vgId, pOffset->subKey); - return 0; // no need to update the offset value - } - - if (pSavedOffset->val.version == pOffset->val.version) { - tqDebug("vgId:%d subKey:%s no need to seek to %" PRId64 " prev offset:%" PRId64, vgId, pOffset->subKey, - pOffset->val.version, pSavedOffset->val.version); - return 0; - } - } - - int64_t sver = 0, ever = 0; - walReaderValidVersionRange(pHandle->execHandle.pTqReader->pWalReader, &sver, &ever); - if (pOffset->val.version < sver) { - pOffset->val.version = sver; - } else if (pOffset->val.version > ever) { - pOffset->val.version = ever; - } - - // save the new offset value - if (pSavedOffset != NULL) { - tqDebug("vgId:%d sub:%s seek to:%" PRId64 " prev offset:%" PRId64, vgId, pOffset->subKey, pOffset->val.version, - pSavedOffset->val.version); - } else { - tqDebug("vgId:%d sub:%s seek to:%" PRId64 " not saved yet", vgId, pOffset->subKey, pOffset->val.version); - } - - if (tqOffsetWrite(pTq->pOffsetStore, pOffset) < 0) { - tqError("failed to save offset, vgId:%d sub:%s seek to %" PRId64, vgId, pOffset->subKey, pOffset->val.version); - return -1; - } - - tqDebug("topic:%s, vgId:%d consumer:0x%" PRIx64 " offset is update to:%" PRId64, vgOffset.offset.subKey, vgId, - vgOffset.consumerId, vgOffset.offset.val.version); + //if consumer register to push manager, push empty to consumer to change vg status from TMQ_VG_STATUS__WAIT to TMQ_VG_STATUS__IDLE, + //otherwise poll data failed after seek. + tqUnregisterPushHandle(pTq, pHandle); + taosRUnLockLatch(&pTq->lock); +end: + rsp.code = code; + tmsgSendRsp(&rsp); return 0; + +// SMqVgOffset vgOffset = {0}; +// int32_t vgId = TD_VID(pTq->pVnode); +// +// SDecoder decoder; +// tDecoderInit(&decoder, (uint8_t*)msg, msgLen); +// if (tDecodeMqVgOffset(&decoder, &vgOffset) < 0) { +// tqError("vgId:%d failed to decode seek msg", vgId); +// return -1; +// } +// +// tDecoderClear(&decoder); +// +// tqDebug("topic:%s, vgId:%d process offset seek by consumer:0x%" PRIx64 ", req offset:%" PRId64, +// vgOffset.offset.subKey, vgId, vgOffset.consumerId, vgOffset.offset.val.version); +// +// STqOffset* pOffset = &vgOffset.offset; +// if (pOffset->val.type != TMQ_OFFSET__LOG) { +// tqError("vgId:%d, subKey:%s invalid seek offset type:%d", vgId, pOffset->subKey, pOffset->val.type); +// return -1; +// } +// +// STqHandle* pHandle = taosHashGet(pTq->pHandle, pOffset->subKey, strlen(pOffset->subKey)); +// if (pHandle == NULL) { +// tqError("tmq seek: consumer:0x%" PRIx64 " vgId:%d subkey %s not found", vgOffset.consumerId, vgId, pOffset->subKey); +// terrno = TSDB_CODE_INVALID_MSG; +// return -1; +// } +// +// // 2. check consumer-vg assignment status +// taosRLockLatch(&pTq->lock); +// if (pHandle->consumerId != vgOffset.consumerId) { +// tqDebug("ERROR tmq seek: consumer:0x%" PRIx64 " vgId:%d, subkey %s, mismatch for saved handle consumer:0x%" PRIx64, +// vgOffset.consumerId, vgId, pOffset->subKey, pHandle->consumerId); +// terrno = TSDB_CODE_TMQ_CONSUMER_MISMATCH; +// taosRUnLockLatch(&pTq->lock); +// return -1; +// } +// taosRUnLockLatch(&pTq->lock); +// +// // 3. check the offset info +// STqOffset* pSavedOffset = tqOffsetRead(pTq->pOffsetStore, pOffset->subKey); +// if (pSavedOffset != NULL) { +// if (pSavedOffset->val.type != TMQ_OFFSET__LOG) { +// tqError("invalid saved offset type, vgId:%d sub:%s", vgId, pOffset->subKey); +// return 0; // no need to update the offset value +// } +// +// if (pSavedOffset->val.version == pOffset->val.version) { +// tqDebug("vgId:%d subKey:%s no need to seek to %" PRId64 " prev offset:%" PRId64, vgId, pOffset->subKey, +// pOffset->val.version, pSavedOffset->val.version); +// return 0; +// } +// } +// +// int64_t sver = 0, ever = 0; +// walReaderValidVersionRange(pHandle->execHandle.pTqReader->pWalReader, &sver, &ever); +// if (pOffset->val.version < sver) { +// pOffset->val.version = sver; +// } else if (pOffset->val.version > ever) { +// pOffset->val.version = ever; +// } +// +// // save the new offset value +// if (pSavedOffset != NULL) { +// tqDebug("vgId:%d sub:%s seek to:%" PRId64 " prev offset:%" PRId64, vgId, pOffset->subKey, pOffset->val.version, +// pSavedOffset->val.version); +// } else { +// tqDebug("vgId:%d sub:%s seek to:%" PRId64 " not saved yet", vgId, pOffset->subKey, pOffset->val.version); +// } +// +// if (tqOffsetWrite(pTq->pOffsetStore, pOffset) < 0) { +// tqError("failed to save offset, vgId:%d sub:%s seek to %" PRId64, vgId, pOffset->subKey, pOffset->val.version); +// return -1; +// } +// +// tqDebug("topic:%s, vgId:%d consumer:0x%" PRIx64 " offset is update to:%" PRId64, vgOffset.offset.subKey, vgId, +// vgOffset.consumerId, vgOffset.offset.val.version); +// +// return 0; } int32_t tqCheckColModifiable(STQ* pTq, int64_t tbUid, int32_t colId) { @@ -480,10 +534,11 @@ int32_t tqProcessPollPush(STQ* pTq, SRpcMsg* pMsg) { while (pIter) { STqHandle* pHandle = *(STqHandle**)pIter; - tqDebug("vgId:%d start set submit for pHandle:%p, consumer:0x%" PRIx64, vgId, pHandle, pHandle->consumerId); + tqInfo("vgId:%d start set submit for pHandle:%p, consumer:0x%" PRIx64, vgId, pHandle, pHandle->consumerId); if (ASSERT(pHandle->msg != NULL)) { tqError("pHandle->msg should not be null"); + taosHashCancelIterate(pTq->pPushMgr, pIter); break; }else{ SRpcMsg msg = {.msgType = TDMT_VND_TMQ_CONSUME, .pCont = pHandle->msg->pCont, .contLen = pHandle->msg->contLen, .info = pHandle->msg->info}; @@ -521,10 +576,18 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { // 1. find handle pHandle = taosHashGet(pTq->pHandle, req.subKey, strlen(req.subKey)); if (pHandle == NULL) { - tqError("tmq poll: consumer:0x%" PRIx64 " vgId:%d subkey %s not found", consumerId, vgId, req.subKey); - terrno = TSDB_CODE_INVALID_MSG; - taosWUnLockLatch(&pTq->lock); - return -1; + do{ + if (tqMetaGetHandle(pTq, req.subKey) == 0){ + pHandle = taosHashGet(pTq->pHandle, req.subKey, strlen(req.subKey)); + if(pHandle != NULL){ + break; + } + } + tqError("tmq poll: consumer:0x%" PRIx64 " vgId:%d subkey %s not found", consumerId, vgId, req.subKey); + terrno = TSDB_CODE_INVALID_MSG; + taosWUnLockLatch(&pTq->lock); + return -1; + }while(0); } // 2. check re-balance status @@ -574,6 +637,49 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { return code; } +int32_t tqProcessVgCommittedInfoReq(STQ* pTq, SRpcMsg* pMsg) { + void* data = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); + int32_t len = pMsg->contLen - sizeof(SMsgHead); + + SMqVgOffset vgOffset = {0}; + + SDecoder decoder; + tDecoderInit(&decoder, (uint8_t*)data, len); + if (tDecodeMqVgOffset(&decoder, &vgOffset) < 0) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + tDecoderClear(&decoder); + + STqOffset* pOffset = &vgOffset.offset; + STqOffset* pSavedOffset = tqOffsetRead(pTq->pOffsetStore, pOffset->subKey); + if (pSavedOffset == NULL) { + return TSDB_CODE_TMQ_NO_COMMITTED; + } + vgOffset.offset = *pSavedOffset; + + int32_t code = 0; + tEncodeSize(tEncodeMqVgOffset, &vgOffset, len, code); + if (code < 0) { + return TSDB_CODE_INVALID_PARA; + } + + void* buf = rpcMallocCont(len); + if (buf == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + SEncoder encoder; + tEncoderInit(&encoder, buf, len); + tEncodeMqVgOffset(&encoder, &vgOffset); + tEncoderClear(&encoder); + + SRpcMsg rsp = {.info = pMsg->info, .pCont = buf, .contLen = len, .code = 0}; + + tmsgSendRsp(&rsp); + + return 0; +} + int32_t tqProcessVgWalInfoReq(STQ* pTq, SRpcMsg* pMsg) { SMqPollReq req = {0}; if (tDeserializeSMqPollReq(pMsg->pCont, pMsg->contLen, &req) < 0) { @@ -659,7 +765,7 @@ int32_t tqProcessDeleteSubReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg SMqVDeleteReq* pReq = (SMqVDeleteReq*)msg; int32_t vgId = TD_VID(pTq->pVnode); - tqDebug("vgId:%d, tq process delete sub req %s", vgId, pReq->subKey); + tqInfo("vgId:%d, tq process delete sub req %s", vgId, pReq->subKey); int32_t code = 0; taosWLockLatch(&pTq->lock); @@ -740,7 +846,7 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg return -1; } - tqDebug("vgId:%d, tq process sub req:%s, Id:0x%" PRIx64 " -> Id:0x%" PRIx64, pTq->pVnode->config.vgId, req.subKey, + tqInfo("vgId:%d, tq process sub req:%s, Id:0x%" PRIx64 " -> Id:0x%" PRIx64, pTq->pVnode->config.vgId, req.subKey, req.oldConsumerId, req.newConsumerId); STqHandle* pHandle = NULL; @@ -771,30 +877,28 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg taosWLockLatch(&pTq->lock); if (pHandle->consumerId == req.newConsumerId) { // do nothing - tqInfo("vgId:%d consumer:0x%" PRIx64 " remains, no switch occurs, should not reach here", req.vgId, - req.newConsumerId); + tqInfo("vgId:%d no switch consumer:0x%" PRIx64 " remains, because redo wal log", req.vgId, req.newConsumerId); } else { - tqInfo("vgId:%d switch consumer from Id:0x%" PRIx64 " to Id:0x%" PRIx64, req.vgId, pHandle->consumerId, - req.newConsumerId); + tqInfo("vgId:%d switch consumer from Id:0x%" PRIx64 " to Id:0x%" PRIx64, req.vgId, pHandle->consumerId, req.newConsumerId); atomic_store_64(&pHandle->consumerId, req.newConsumerId); + // atomic_add_fetch_32(&pHandle->epoch, 1); + + // kill executing task + // if(tqIsHandleExec(pHandle)) { + // qTaskInfo_t pTaskInfo = pHandle->execHandle.task; + // if (pTaskInfo != NULL) { + // qKillTask(pTaskInfo, TSDB_CODE_SUCCESS); + // } + + // if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { + // qStreamCloseTsdbReader(pTaskInfo); + // } + // } + // remove if it has been register in the push manager, and return one empty block to consumer + tqUnregisterPushHandle(pTq, pHandle); + ret = tqMetaSaveHandle(pTq, req.subKey, pHandle); } - // atomic_add_fetch_32(&pHandle->epoch, 1); - - // kill executing task - // if(tqIsHandleExec(pHandle)) { - // qTaskInfo_t pTaskInfo = pHandle->execHandle.task; - // if (pTaskInfo != NULL) { - // qKillTask(pTaskInfo, TSDB_CODE_SUCCESS); - // } - - // if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { - // qStreamCloseTsdbReader(pTaskInfo); - // } - // } - // remove if it has been register in the push manager, and return one empty block to consumer - tqUnregisterPushHandle(pTq, pHandle); taosWUnLockLatch(&pTq->lock); - ret = tqMetaSaveHandle(pTq, req.subKey, pHandle); } end: @@ -811,17 +915,20 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { pTask->refCnt = 1; pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE; pTask->inputQueue = streamQueueOpen(512 << 10); - pTask->outputQueue = streamQueueOpen(512 << 10); + pTask->outputInfo.queue = streamQueueOpen(512 << 10); - if (pTask->inputQueue == NULL || pTask->outputQueue == NULL) { + if (pTask->inputQueue == NULL || pTask->outputInfo.queue == NULL) { + tqError("s-task:%s failed to prepare the input/output queue, initialize task failed", pTask->id.idStr); return -1; } + pTask->tsInfo.init = taosGetTimestampMs(); pTask->inputStatus = TASK_INPUT_STATUS__NORMAL; - pTask->outputStatus = TASK_OUTPUT_STATUS__NORMAL; + pTask->outputInfo.status = TASK_OUTPUT_STATUS__NORMAL; pTask->pMsgCb = &pTq->pVnode->msgCb; pTask->pMeta = pTq->pStreamMeta; + // backup the initial status, and set it to be TASK_STATUS__INIT pTask->chkInfo.version = ver; pTask->chkInfo.currentVer = ver; @@ -880,15 +987,14 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { if (pTask->exec.pExecutor == NULL) { return -1; } - qSetTaskId(pTask->exec.pExecutor, pTask->id.taskId, pTask->id.streamId); } // sink - if (pTask->outputType == TASK_OUTPUT__SMA) { + if (pTask->outputInfo.type == TASK_OUTPUT__SMA) { pTask->smaSink.vnode = pTq->pVnode; pTask->smaSink.smaSink = smaHandleRes; - } else if (pTask->outputType == TASK_OUTPUT__TABLE) { + } else if (pTask->outputInfo.type == TASK_OUTPUT__TABLE) { pTask->tbSink.vnode = pTq->pVnode; pTask->tbSink.tbSinkFunc = tqSinkToTablePipeline; @@ -913,10 +1019,17 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { pTask->exec.pWalReader = walOpenReader(pTq->pVnode->pWal, &cond); } + // reset the task status from unfinished transaction + if (pTask->status.taskStatus == TASK_STATUS__PAUSE) { + tqWarn("s-task:%s reset task status to be normal, kept in meta status: Paused", pTask->id.idStr); + pTask->status.taskStatus = TASK_STATUS__NORMAL; + } + + taosThreadMutexInit(&pTask->lock, NULL); streamSetupScheduleTrigger(pTask); tqInfo("vgId:%d expand stream task, s-task:%s, checkpoint ver:%" PRId64 - " child id:%d, level:%d, scan-history:%d, trigger:%" PRId64 " ms", + " child id:%d, level:%d, fill-history:%d, trigger:%" PRId64 " ms, disable pause", vgId, pTask->id.idStr, pTask->chkInfo.version, pTask->info.selfChildId, pTask->info.taskLevel, pTask->info.fillHistory, pTask->triggerParam); @@ -954,37 +1067,16 @@ int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) { rsp.status = streamTaskCheckStatus(pTask); streamMetaReleaseTask(pTq->pStreamMeta, pTask); - tqDebug("s-task:%s recv task check req(reqId:0x%" PRIx64 ") task:0x%x (vgId:%d), status:%s, rsp status %d", - pTask->id.idStr, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, - streamGetTaskStatusStr(pTask->status.taskStatus), rsp.status); + const char* pStatus = streamGetTaskStatusStr(pTask->status.taskStatus); + tqDebug("s-task:%s status:%s, recv task check req(reqId:0x%" PRIx64 ") task:0x%x (vgId:%d), ready:%d", + pTask->id.idStr, pStatus, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status); } else { rsp.status = 0; tqDebug("tq recv task check(taskId:0x%x not built yet) req(reqId:0x%" PRIx64 ") from task:0x%x (vgId:%d), rsp status %d", taskId, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status); } - SEncoder encoder; - int32_t code; - int32_t len; - - tEncodeSize(tEncodeStreamTaskCheckRsp, &rsp, len, code); - if (code < 0) { - tqError("vgId:%d failed to encode task check rsp, task:0x%x", pTq->pStreamMeta->vgId, taskId); - return -1; - } - - void* buf = rpcMallocCont(sizeof(SMsgHead) + len); - ((SMsgHead*)buf)->vgId = htonl(req.upstreamNodeId); - - void* abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); - tEncoderInit(&encoder, (uint8_t*)abuf, len); - tEncodeStreamTaskCheckRsp(&encoder, &rsp); - tEncoderClear(&encoder); - - SRpcMsg rspMsg = {.code = 0, .pCont = buf, .contLen = sizeof(SMsgHead) + len, .info = pMsg->info}; - - tmsgSendRsp(&rspMsg); - return 0; + return streamSendCheckRsp(pTq->pStreamMeta, &req, &rsp, &pMsg->info, taskId); } int32_t tqProcessStreamTaskCheckRsp(STQ* pTq, int64_t sversion, SRpcMsg* pMsg) { @@ -1051,34 +1143,46 @@ int32_t tqProcessTaskDeployReq(STQ* pTq, int64_t sversion, char* msg, int32_t ms SStreamMeta* pStreamMeta = pTq->pStreamMeta; // 2.save task, use the newest commit version as the initial start version of stream task. - taosWLockLatch(&pStreamMeta->lock); - code = streamMetaAddDeployedTask(pStreamMeta, sversion, pTask); + int32_t taskId = pTask->id.taskId; + bool added = false; + taosWLockLatch(&pStreamMeta->lock); + code = streamMetaRegisterTask(pStreamMeta, sversion, pTask, &added); int32_t numOfTasks = streamMetaGetNumOfTasks(pStreamMeta); + if (code < 0) { - tqError("vgId:%d failed to add s-task:%s, total:%d", vgId, pTask->id.idStr, numOfTasks); + tqError("vgId:%d failed to add s-task:0x%x, total:%d", vgId, pTask->id.taskId, numOfTasks); + tFreeStreamTask(pTask); taosWUnLockLatch(&pStreamMeta->lock); return -1; } + // not added into meta store + if (!added) { + tqWarn("vgId:%d failed to add s-task:0x%x, already exists in meta store", vgId, taskId); + tFreeStreamTask(pTask); + pTask = NULL; + } + taosWUnLockLatch(&pStreamMeta->lock); - // 3. It's an fill history task, do nothing. wait for the main task to start it - streamPrepareNdoCheckDownstream(pTask); + tqDebug("vgId:%d s-task:0x%x is deployed and add into meta, numOfTasks:%d", vgId, taskId, numOfTasks); - tqDebug("vgId:%d s-task:%s is deployed and add into meta, status:%s, numOfTasks:%d", vgId, pTask->id.idStr, - streamGetTaskStatusStr(pTask->status.taskStatus), numOfTasks); + // 3. It's an fill history task, do nothing. wait for the main task to start it + SStreamTask* p = streamMetaAcquireTask(pStreamMeta, taskId); + if (p != NULL) { // reset the downstreamReady flag. + streamTaskCheckDownstreamTasks(p); + } + streamMetaReleaseTask(pStreamMeta, p); return 0; } int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { - int32_t code = TSDB_CODE_SUCCESS; - char* msg = pMsg->pCont; - + SStreamScanHistoryReq* pReq = (SStreamScanHistoryReq*)pMsg->pCont; SStreamMeta* pMeta = pTq->pStreamMeta; - SStreamScanHistoryReq* pReq = (SStreamScanHistoryReq*)msg; + int32_t code = TSDB_CODE_SUCCESS; SStreamTask* pTask = streamMetaAcquireTask(pMeta, pReq->taskId); if (pTask == NULL) { tqError("vgId:%d failed to acquire stream task:0x%x during stream recover, task may have been destroyed", @@ -1086,124 +1190,126 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { return -1; } - // do recovery step 1 - const char* pId = pTask->id.idStr; - tqDebug("s-task:%s start history data scan stage(step 1), status:%s", pId, - streamGetTaskStatusStr(pTask->status.taskStatus)); + // do recovery step1 + const char* id = pTask->id.idStr; + const char* pStatus = streamGetTaskStatusStr(pTask->status.taskStatus); + tqDebug("s-task:%s start scan-history stage(step 1), status:%s", id, pStatus); + + if (pTask->tsInfo.step1Start == 0) { + ASSERT(pTask->status.pauseAllowed == false); + pTask->tsInfo.step1Start = taosGetTimestampMs(); + if (pTask->info.fillHistory == 1) { + streamTaskEnablePause(pTask); + } + } else { + tqDebug("s-task:%s resume from paused, start ts:%"PRId64, pTask->id.idStr, pTask->tsInfo.step1Start); + } - int64_t st = taosGetTimestampMs(); - int8_t schedStatus = atomic_val_compare_exchange_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE, - TASK_SCHED_STATUS__WAITING); + // we have to continue retrying to successfully execute the scan history task. + int8_t schedStatus = atomic_val_compare_exchange_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE, + TASK_SCHED_STATUS__WAITING); if (schedStatus != TASK_SCHED_STATUS__INACTIVE) { - ASSERT(0); + tqError( + "s-task:%s failed to start scan-history in first stream time window since already started, unexpected " + "sched-status:%d", + id, schedStatus); return 0; } - if (!streamTaskRecoverScanStep1Finished(pTask)) { - streamSourceScanHistoryData(pTask); + if (pTask->info.fillHistory == 1) { + ASSERT(pTask->status.pauseAllowed == true); } - if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING || streamTaskShouldPause(&pTask->status)) { - tqDebug("s-task:%s is dropped or paused, abort recover in step1", pId); + streamSourceScanHistoryData(pTask); + if (pTask->status.taskStatus == TASK_STATUS__PAUSE) { + double el = (taosGetTimestampMs() - pTask->tsInfo.step1Start) / 1000.0; + tqDebug("s-task:%s is paused in the step1, elapsed time:%.2fs, sched-status:%d", pTask->id.idStr, el, + TASK_SCHED_STATUS__INACTIVE); atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); - streamMetaReleaseTask(pMeta, pTask); return 0; } - double el = (taosGetTimestampMs() - st) / 1000.0; - tqDebug("s-task:%s history data scan stage(step 1) ended, elapsed time:%.2fs", pId, el); + // the following procedure should be executed, no matter status is stop/pause or not + double el = (taosGetTimestampMs() - pTask->tsInfo.step1Start) / 1000.0; + tqDebug("s-task:%s scan-history stage(step 1) ended, elapsed time:%.2fs", id, el); if (pTask->info.fillHistory) { SVersionRange* pRange = NULL; SStreamTask* pStreamTask = NULL; + bool done = false; - if (!pReq->igUntreated && !streamTaskRecoverScanStep1Finished(pTask)) { - // 1. stop the related stream task, get the current scan wal version of stream task, ver. - pStreamTask = streamMetaAcquireTask(pMeta, pTask->streamTaskId.taskId); - if (pStreamTask == NULL) { - qError("failed to find s-task:0x%x, it may have been destroyed, drop fill history task:%s", - pTask->streamTaskId.taskId, pTask->id.idStr); - - pTask->status.taskStatus = TASK_STATUS__DROPPING; - tqDebug("s-task:%s scan-history-task set status to be dropping", pId); + // 1. get the related stream task + pStreamTask = streamMetaAcquireTask(pMeta, pTask->streamTaskId.taskId); + if (pStreamTask == NULL) { + // todo delete this task, if the related stream task is dropped + qError("failed to find s-task:0x%x, it may have been destroyed, drop fill-history task:%s", + pTask->streamTaskId.taskId, pTask->id.idStr); - streamMetaSaveTask(pMeta, pTask); - streamMetaReleaseTask(pMeta, pTask); - return -1; - } - - ASSERT(pStreamTask->info.taskLevel == TASK_LEVEL__SOURCE); - - // wait for the stream task get ready for scan history data - while (((pStreamTask->status.downstreamReady == 0) && (pStreamTask->status.taskStatus != TASK_STATUS__STOP)) || - pStreamTask->status.taskStatus == TASK_STATUS__SCAN_HISTORY) { - tqDebug( - "s-task:%s level:%d related stream task:%s(status:%s) not ready for halt, wait for it and recheck in 100ms", - pId, pTask->info.taskLevel, pStreamTask->id.idStr, streamGetTaskStatusStr(pStreamTask->status.taskStatus)); - taosMsleep(100); - } + tqDebug("s-task:%s fill-history task set status to be dropping", id); - // now we can stop the stream task execution - pStreamTask->status.taskStatus = TASK_STATUS__HALT; - tqDebug("s-task:%s level:%d status is set to halt by history scan task:%s", pStreamTask->id.idStr, - pStreamTask->info.taskLevel, pId); - - // if it's an source task, extract the last version in wal. - streamHistoryTaskSetVerRangeStep2(pTask); + streamMetaUnregisterTask(pMeta, pTask->id.taskId); + streamMetaReleaseTask(pMeta, pTask); + return -1; } - if (!streamTaskRecoverScanStep1Finished(pTask)) { - tqDebug("s-task:%s level:%d verRange:%" PRId64 " - %" PRId64 " do secondary scan-history-data after halt the related stream task:%s", - pId, pTask->info.taskLevel, pRange->minVer, pRange->maxVer, pId); - ASSERT(pTask->status.schedStatus == TASK_SCHED_STATUS__WAITING); + ASSERT(pStreamTask->info.taskLevel == TASK_LEVEL__SOURCE); - st = taosGetTimestampMs(); - streamSetParamForStreamScannerStep2(pTask, pRange, &pTask->dataRange.window); + // 2. it cannot be paused, when the stream task in TASK_STATUS__SCAN_HISTORY status. Let's wait for the + // stream task get ready for scan history data + while (pStreamTask->status.taskStatus == TASK_STATUS__SCAN_HISTORY) { + tqDebug( + "s-task:%s level:%d related stream task:%s(status:%s) not ready for halt, wait for it and recheck in 100ms", + id, pTask->info.taskLevel, pStreamTask->id.idStr, streamGetTaskStatusStr(pStreamTask->status.taskStatus)); + taosMsleep(100); } - if (!streamTaskRecoverScanStep2Finished(pTask)) { - streamSourceScanHistoryData(pTask); + // now we can stop the stream task execution + streamTaskHalt(pStreamTask); - if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING || streamTaskShouldPause(&pTask->status)) { - tqDebug("s-task:%s is dropped or paused, abort recover in step1", pId); - streamMetaReleaseTask(pMeta, pTask); - return 0; - } + tqDebug("s-task:%s level:%d sched-status:%d is halt by fill-history task:%s", pStreamTask->id.idStr, + pStreamTask->info.taskLevel, pStreamTask->status.schedStatus, id); - streamTaskRecoverSetAllStepFinished(pTask); - } + // if it's an source task, extract the last version in wal. + pRange = &pTask->dataRange.range; + int64_t latestVer = walReaderGetCurrentVer(pStreamTask->exec.pWalReader); + done = streamHistoryTaskSetVerRangeStep2(pTask, latestVer); - el = (taosGetTimestampMs() - st) / 1000.0; - tqDebug("s-task:%s history data scan stage(step 2) ended, elapsed time:%.2fs", pId, el); + if (done) { + pTask->tsInfo.step2Start = taosGetTimestampMs(); + streamTaskEndScanWAL(pTask); + streamMetaReleaseTask(pMeta, pTask); + } else { + STimeWindow* pWindow = &pTask->dataRange.window; + tqDebug("s-task:%s level:%d verRange:%" PRId64 " - %" PRId64 " window:%" PRId64 "-%" PRId64 + ", do secondary scan-history from WAL after halt the related stream task:%s", + id, pTask->info.taskLevel, pRange->minVer, pRange->maxVer, pWindow->skey, pWindow->ekey, + pStreamTask->id.idStr); + ASSERT(pTask->status.schedStatus == TASK_SCHED_STATUS__WAITING); - // 3. notify downstream tasks to transfer executor state after handle all history blocks. - if (!pTask->status.transferState) { - code = streamDispatchTransferStateMsg(pTask); - if (code != TSDB_CODE_SUCCESS) { - // todo handle error - } + pTask->tsInfo.step2Start = taosGetTimestampMs(); + streamSetParamForStreamScannerStep2(pTask, pRange, pWindow); - pTask->status.transferState = true; - } + int64_t dstVer = pTask->dataRange.range.minVer - 1; - // 4. 1) transfer the ownership of executor state, 2) update the scan data range for source task. - // 5. resume the related stream task. - streamTryExec(pTask); + pTask->chkInfo.currentVer = dstVer; + walReaderSetSkipToVersion(pTask->exec.pWalReader, dstVer); + tqDebug("s-task:%s wal reader start scan WAL verRange:%" PRId64 "-%" PRId64 ", set sched-status:%d", id, dstVer, + pTask->dataRange.range.maxVer, TASK_SCHED_STATUS__INACTIVE); - pTask->status.taskStatus = TASK_STATUS__DROPPING; - tqDebug("s-task:%s scan-history-task set status to be dropping", pId); + atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); - streamMetaSaveTask(pMeta, pTask); - streamMetaSaveTask(pMeta, pStreamTask); + // set the fill-history task to be normal + if (pTask->info.fillHistory == 1) { + streamSetStatusNormal(pTask); + } - streamMetaReleaseTask(pMeta, pTask); - streamMetaReleaseTask(pMeta, pStreamTask); + // 4. 1) transfer the ownership of executor state, 2) update the scan data range for source task. + // 5. resume the related stream task. + streamMetaReleaseTask(pMeta, pTask); + streamMetaReleaseTask(pMeta, pStreamTask); - taosWLockLatch(&pMeta->lock); - if (streamMetaCommit(pTask->pMeta) < 0) { - // persist to disk + tqStartStreamTasks(pTq); } - taosWUnLockLatch(&pMeta->lock); } else { // todo update the chkInfo version for current task. // this task has an associated history stream task, so we need to scan wal from the end version of @@ -1212,24 +1318,26 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { if (pTask->historyTaskId.taskId == 0) { *pWindow = (STimeWindow){INT64_MIN, INT64_MAX}; - tqDebug("s-task:%s no related scan-history-data task, reset the time window:%" PRId64 " - %" PRId64, pId, - pWindow->skey, pWindow->ekey); + tqDebug( + "s-task:%s scan-history in stream time window completed, no related fill-history task, reset the time " + "window:%" PRId64 " - %" PRId64, + id, pWindow->skey, pWindow->ekey); + qStreamInfoResetTimewindowFilter(pTask->exec.pExecutor); } else { + // when related fill-history task exists, update the fill-history time window only when the + // state transfer is completed. tqDebug( - "s-task:%s history data in current time window scan completed, now start to handle data from WAL, start " + "s-task:%s scan-history in stream time window completed, now start to handle data from WAL, start " "ver:%" PRId64 ", window:%" PRId64 " - %" PRId64, - pId, pTask->chkInfo.currentVer, pWindow->skey, pWindow->ekey); + id, pTask->chkInfo.currentVer, pWindow->skey, pWindow->ekey); } // notify the downstream agg tasks that upstream tasks are ready to processing the WAL data, update the code = streamTaskScanHistoryDataComplete(pTask); streamMetaReleaseTask(pMeta, pTask); - // let's start the stream task by extracting data from wal - if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { - tqStartStreamTasks(pTq); - } - + // when all source task complete to scan history data in stream time window, they are allowed to handle stream data + // at the same time. return code; } @@ -1248,35 +1356,25 @@ int32_t tqProcessTaskTransferStateReq(STQ* pTq, SRpcMsg* pMsg) { int32_t code = tDecodeStreamScanHistoryFinishReq(&decoder, &req); tDecoderClear(&decoder); - SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, req.taskId); + tqDebug("vgId:%d start to process transfer state msg, from s-task:0x%x", pTq->pStreamMeta->vgId, req.downstreamTaskId); + + SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, req.downstreamTaskId); if (pTask == NULL) { - tqError("failed to find task:0x%x, it may have been dropped already", req.taskId); + tqError("failed to find task:0x%x, it may have been dropped already. process transfer state failed", req.downstreamTaskId); return -1; } int32_t remain = streamAlignTransferState(pTask); if (remain > 0) { - tqDebug("s-task:%s receive transfer state msg, remain:%d", pTask->id.idStr, remain); + tqDebug("s-task:%s receive upstream transfer state msg, remain:%d", pTask->id.idStr, remain); + streamMetaReleaseTask(pTq->pStreamMeta, pTask); return 0; } // transfer the ownership of executor state - tqDebug("s-task:%s all upstream tasks end transfer msg", pTask->id.idStr); + tqDebug("s-task:%s all upstream tasks send transfer msg, open transfer state flag", pTask->id.idStr); + ASSERT(pTask->streamTaskId.taskId != 0 && pTask->info.fillHistory == 1); - // related stream task load the state from the state storage backend - SStreamTask* pStreamTask = streamMetaAcquireTask(pTq->pStreamMeta, pTask->streamTaskId.taskId); - if (pStreamTask == NULL) { - streamMetaReleaseTask(pTq->pStreamMeta, pTask); - tqError("failed to find related stream task:0x%x, it may have been dropped already", req.taskId); - return -1; - } - - // when all upstream tasks have notified the this task to start transfer state, then we start the transfer procedure. - streamTaskReleaseState(pTask); - streamTaskReloadState(pStreamTask); - streamMetaReleaseTask(pTq->pStreamMeta, pStreamTask); - - ASSERT(pTask->streamTaskId.taskId != 0); pTask->status.transferState = true; streamSchedExec(pTask); @@ -1284,7 +1382,7 @@ int32_t tqProcessTaskTransferStateReq(STQ* pTq, SRpcMsg* pMsg) { return 0; } -int32_t tqProcessStreamTaskScanHistoryFinishReq(STQ* pTq, SRpcMsg* pMsg) { +int32_t tqProcessTaskScanHistoryFinishReq(STQ* pTq, SRpcMsg* pMsg) { char* msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); int32_t msgLen = pMsg->contLen - sizeof(SMsgHead); @@ -1296,20 +1394,51 @@ int32_t tqProcessStreamTaskScanHistoryFinishReq(STQ* pTq, SRpcMsg* pMsg) { tDecodeStreamScanHistoryFinishReq(&decoder, &req); tDecoderClear(&decoder); - // find task - SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, req.taskId); + SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, req.downstreamTaskId); if (pTask == NULL) { - tqError("failed to find task:0x%x, it may be destroyed, vgId:%d", req.taskId, pTq->pStreamMeta->vgId); + tqError("vgId:%d process scan history finish msg, failed to find task:0x%x, it may be destroyed", + pTq->pStreamMeta->vgId, req.downstreamTaskId); return -1; } - int32_t code = streamProcessScanHistoryFinishReq(pTask, req.taskId, req.childId); + tqDebug("s-task:%s receive scan-history finish msg from task:0x%x", pTask->id.idStr, req.upstreamTaskId); + + int32_t code = streamProcessScanHistoryFinishReq(pTask, &req, &pMsg->info); streamMetaReleaseTask(pTq->pStreamMeta, pTask); return code; } -int32_t tqProcessTaskRecoverFinishRsp(STQ* pTq, SRpcMsg* pMsg) { - // +int32_t tqProcessTaskScanHistoryFinishRsp(STQ* pTq, SRpcMsg* pMsg) { + char* msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); + int32_t msgLen = pMsg->contLen - sizeof(SMsgHead); + + // deserialize + SStreamCompleteHistoryMsg req = {0}; + + SDecoder decoder; + tDecoderInit(&decoder, (uint8_t*)msg, msgLen); + tDecodeCompleteHistoryDataMsg(&decoder, &req); + tDecoderClear(&decoder); + + SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, req.upstreamTaskId); + if (pTask == NULL) { + tqError("vgId:%d process scan history finish rsp, failed to find task:0x%x, it may be destroyed", + pTq->pStreamMeta->vgId, req.upstreamTaskId); + return -1; + } + + int32_t remain = atomic_sub_fetch_32(&pTask->notReadyTasks, 1); + if (remain > 0) { + tqDebug("s-task:%s scan-history finish rsp received from downstream task:0x%x, remain:%d not send finish rsp", + pTask->id.idStr, req.downstreamId, remain); + } else { + tqDebug( + "s-task:%s scan-history finish rsp received from downstream task:0x%x, all downstream tasks rsp scan-history " + "completed msg", pTask->id.idStr, req.downstreamId); + streamProcessScanHistoryFinishRsp(pTask); + } + + streamMetaReleaseTask(pTq->pStreamMeta, pTask); return 0; } @@ -1385,8 +1514,8 @@ int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg) { SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId); if (pTask != NULL) { // even in halt status, the data in inputQ must be processed - int8_t status = pTask->status.taskStatus; - if (status == TASK_STATUS__NORMAL || status == TASK_STATUS__HALT) { + int8_t st = pTask->status.taskStatus; + if (st == TASK_STATUS__NORMAL || st == TASK_STATUS__SCAN_HISTORY/* || st == TASK_STATUS__SCAN_HISTORY_WAL*/) { tqDebug("vgId:%d s-task:%s start to process block from inputQ, last chk point:%" PRId64, vgId, pTask->id.idStr, pTask->chkInfo.version); streamProcessRunReq(pTask); @@ -1447,37 +1576,52 @@ int32_t tqProcessTaskDispatchRsp(STQ* pTq, SRpcMsg* pMsg) { int32_t tqProcessTaskDropReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) { SVDropStreamTaskReq* pReq = (SVDropStreamTaskReq*)msg; tqDebug("vgId:%d receive msg to drop stream task:0x%x", TD_VID(pTq->pVnode), pReq->taskId); - - streamMetaRemoveTask(pTq->pStreamMeta, pReq->taskId); - return 0; -} - -int32_t tqProcessTaskPauseImpl(SStreamMeta* pStreamMeta, SStreamTask* pTask) { - if (pTask) { - if (!streamTaskShouldPause(&pTask->status)) { - tqDebug("vgId:%d s-task:%s set pause flag", pStreamMeta->vgId, pTask->id.idStr); - atomic_store_8(&pTask->status.keepTaskStatus, pTask->status.taskStatus); - atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__PAUSE); - } - streamMetaReleaseTask(pStreamMeta, pTask); - } else { - return -1; + SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, pReq->taskId); + if (pTask == NULL) { + tqError("vgId:%d failed to acquire s-task:0x%x when dropping it", pTq->pStreamMeta->vgId, pReq->taskId); + return 0; } + + streamMetaUnregisterTask(pTq->pStreamMeta, pReq->taskId); + streamMetaReleaseTask(pTq->pStreamMeta, pTask); return 0; } int32_t tqProcessTaskPauseReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) { SVPauseStreamTaskReq* pReq = (SVPauseStreamTaskReq*)msg; - SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, pReq->taskId); - int32_t code = tqProcessTaskPauseImpl(pTq->pStreamMeta, pTask); - if (code != 0) { - return code; + + SStreamMeta* pMeta = pTq->pStreamMeta; + SStreamTask* pTask = streamMetaAcquireTask(pMeta, pReq->taskId); + if (pTask == NULL) { + tqError("vgId:%d process pause req, failed to acquire task:0x%x, it may have been dropped already", pMeta->vgId, + pReq->taskId); + // since task is in [STOP|DROPPING] state, it is safe to assume the pause is active + return TSDB_CODE_SUCCESS; } - SStreamTask* pHistoryTask = streamMetaAcquireTask(pTq->pStreamMeta, pTask->historyTaskId.taskId); - if (pHistoryTask) { - code = tqProcessTaskPauseImpl(pTq->pStreamMeta, pHistoryTask); + + tqDebug("s-task:%s receive pause msg from mnode", pTask->id.idStr); + streamTaskPause(pTask); + + SStreamTask* pHistoryTask = NULL; + if (pTask->historyTaskId.taskId != 0) { + pHistoryTask = streamMetaAcquireTask(pMeta, pTask->historyTaskId.taskId); + if (pHistoryTask == NULL) { + tqError("vgId:%d process pause req, failed to acquire fill-history task:0x%x, it may have been dropped already", + pMeta->vgId, pTask->historyTaskId.taskId); + streamMetaReleaseTask(pMeta, pTask); + + // since task is in [STOP|DROPPING] state, it is safe to assume the pause is active + return TSDB_CODE_SUCCESS; + } + + tqDebug("s-task:%s fill-history task handle paused along with related stream task", pHistoryTask->id.idStr); + + streamTaskPause(pHistoryTask); + streamMetaReleaseTask(pMeta, pHistoryTask); } - return code; + + streamMetaReleaseTask(pMeta, pTask); + return TSDB_CODE_SUCCESS; } int32_t tqProcessTaskResumeImpl(STQ* pTq, SStreamTask* pTask, int64_t sversion, int8_t igUntreated) { @@ -1486,11 +1630,14 @@ int32_t tqProcessTaskResumeImpl(STQ* pTq, SStreamTask* pTask, int64_t sversion, return -1; } - if (streamTaskShouldPause(&pTask->status)) { - atomic_store_8(&pTask->status.taskStatus, pTask->status.keepTaskStatus); + // todo: handle the case: resume from halt to pause/ from halt to normal/ from pause to normal + streamTaskResume(pTask); + int32_t level = pTask->info.taskLevel; + int8_t status = pTask->status.taskStatus; + if (status == TASK_STATUS__NORMAL || status == TASK_STATUS__SCAN_HISTORY) { // no lock needs to secure the access of the version - if (igUntreated && pTask->info.taskLevel == TASK_LEVEL__SOURCE && !pTask->info.fillHistory) { + if (igUntreated && level == TASK_LEVEL__SOURCE && !pTask->info.fillHistory) { // discard all the data when the stream task is suspended. walReaderSetSkipToVersion(pTask->exec.pWalReader, sversion); tqDebug("vgId:%d s-task:%s resume to exec, prev paused version:%" PRId64 ", start from vnode ver:%" PRId64 @@ -1501,9 +1648,9 @@ int32_t tqProcessTaskResumeImpl(STQ* pTq, SStreamTask* pTask, int64_t sversion, vgId, pTask->id.idStr, pTask->chkInfo.currentVer, sversion, pTask->status.schedStatus); } - if (pTask->info.fillHistory && pTask->info.taskLevel == TASK_LEVEL__SOURCE) { - streamStartRecoverTask(pTask, igUntreated); - } else if (pTask->info.taskLevel == TASK_LEVEL__SOURCE && taosQueueItemSize(pTask->inputQueue->queue) == 0) { + if (level == TASK_LEVEL__SOURCE && pTask->info.fillHistory && pTask->status.taskStatus == TASK_STATUS__SCAN_HISTORY) { + streamStartScanHistoryAsync(pTask, igUntreated); + } else if (level == TASK_LEVEL__SOURCE && (taosQueueItemSize(pTask->inputQueue->queue) == 0)) { tqStartStreamTasks(pTq); } else { streamSchedExec(pTask); @@ -1580,9 +1727,8 @@ int32_t vnodeEnqueueStreamMsg(SVnode* pVnode, SRpcMsg* pMsg) { tDecoderClear(&decoder); int32_t taskId = req.taskId; - SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId); - if (pTask) { + if (pTask != NULL) { SRpcMsg rsp = {.info = pMsg->info, .code = 0}; streamProcessDispatchMsg(pTask, &req, &rsp, false); streamMetaReleaseTask(pTq->pStreamMeta, pTask); @@ -1590,18 +1736,22 @@ int32_t vnodeEnqueueStreamMsg(SVnode* pVnode, SRpcMsg* pMsg) { taosFreeQitem(pMsg); return 0; } else { + tDeleteStreamDispatchReq(&req); } code = TSDB_CODE_STREAM_TASK_NOT_EXIST; FAIL: - if (pMsg->info.handle == NULL) return -1; + if (pMsg->info.handle == NULL) { + tqError("s-task:0x%x vgId:%d msg handle is null, abort enqueue dispatch msg", pTq->pStreamMeta->vgId, taskId); + return -1; + } SMsgHead* pRspHead = rpcMallocCont(sizeof(SMsgHead) + sizeof(SStreamDispatchRsp)); if (pRspHead == NULL) { SRpcMsg rsp = {.code = TSDB_CODE_OUT_OF_MEMORY, .info = pMsg->info}; - tqDebug("send dispatch error rsp, code: %x", code); + tqError("s-task:0x%x send dispatch error rsp, code:%s", taskId, tstrerror(code)); tmsgSendRsp(&rsp); rpcFreeCont(pMsg->pCont); taosFreeQitem(pMsg); @@ -1617,9 +1767,10 @@ FAIL: pRsp->downstreamTaskId = htonl(req.taskId); pRsp->inputStatus = TASK_OUTPUT_STATUS__NORMAL; - SRpcMsg rsp = { - .code = code, .info = pMsg->info, .contLen = sizeof(SMsgHead) + sizeof(SStreamDispatchRsp), .pCont = pRspHead}; - tqDebug("send dispatch error rsp, code: %x", code); + int32_t len = sizeof(SMsgHead) + sizeof(SStreamDispatchRsp); + SRpcMsg rsp = { .code = code, .info = pMsg->info, .contLen = len, .pCont = pRspHead}; + tqError("s-task:0x%x send dispatch error rsp, code:%s", taskId, tstrerror(code)); + tmsgSendRsp(&rsp); rpcFreeCont(pMsg->pCont); taosFreeQitem(pMsg); diff --git a/source/dnode/vnode/src/tq/tqMeta.c b/source/dnode/vnode/src/tq/tqMeta.c index df1c9ca7c9cb82b7266604efb74b24fe86a88461..08019f8a76050d6ba6d27186f66b7080eba9fc9f 100644 --- a/source/dnode/vnode/src/tq/tqMeta.c +++ b/source/dnode/vnode/src/tq/tqMeta.c @@ -338,7 +338,7 @@ static int buildHandle(STQ* pTq, STqHandle* handle){ taosArrayDestroy(tbUidList); return -1; } - tqDebug("vgId:%d, tq try to get ctb for stb subscribe, suid:%" PRId64, pVnode->config.vgId, handle->execHandle.execTb.suid); + tqInfo("vgId:%d, tq try to get ctb for stb subscribe, suid:%" PRId64, pVnode->config.vgId, handle->execHandle.execTb.suid); handle->execHandle.pTqReader = tqReaderOpen(pVnode); tqReaderSetTbUidList(handle->execHandle.pTqReader, tbUidList, NULL); taosArrayDestroy(tbUidList); @@ -356,7 +356,7 @@ static int restoreHandle(STQ* pTq, void* pVal, int vLen, STqHandle* handle){ if(buildHandle(pTq, handle) < 0){ return -1; } - tqDebug("tq restore %s consumer %" PRId64 " vgId:%d", handle->subKey, handle->consumerId, vgId); + tqInfo("tq restore %s consumer %" PRId64 " vgId:%d", handle->subKey, handle->consumerId, vgId); return taosHashPut(pTq->pHandle, handle->subKey, strlen(handle->subKey), handle, sizeof(STqHandle)); } @@ -384,7 +384,7 @@ int32_t tqCreateHandle(STQ* pTq, SMqRebVgReq* req, STqHandle* handle){ if(buildHandle(pTq, handle) < 0){ return -1; } - tqDebug("tq restore %s consumer %" PRId64 " vgId:%d", handle->subKey, handle->consumerId, vgId); + tqInfo("tq restore %s consumer %" PRId64 " vgId:%d", handle->subKey, handle->consumerId, vgId); return taosHashPut(pTq->pHandle, handle->subKey, strlen(handle->subKey), handle, sizeof(STqHandle)); } diff --git a/source/dnode/vnode/src/tq/tqPush.c b/source/dnode/vnode/src/tq/tqPush.c index 06af53d453cb4e189b82bcaff9c17c0e81936ed5..a236b9861461d86756b0e86b8295ccdafce52de7 100644 --- a/source/dnode/vnode/src/tq/tqPush.c +++ b/source/dnode/vnode/src/tq/tqPush.c @@ -35,7 +35,10 @@ int32_t tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t v tqProcessSubmitReqForSubscribe(pTq); } + taosRLockLatch(&pTq->pStreamMeta->lock); int32_t numOfTasks = streamMetaGetNumOfTasks(pTq->pStreamMeta); + taosRUnLockLatch(&pTq->pStreamMeta->lock); + tqDebug("handle submit, restore:%d, size:%d", pTq->pVnode->restored, numOfTasks); // push data for stream processing: @@ -75,12 +78,12 @@ int32_t tqRegisterPushHandle(STQ* pTq, void* handle, SRpcMsg* pMsg) { memcpy(pHandle->msg->pCont, pMsg->pCont, pMsg->contLen); pHandle->msg->contLen = pMsg->contLen; int32_t ret = taosHashPut(pTq->pPushMgr, pHandle->subKey, strlen(pHandle->subKey), &pHandle, POINTER_BYTES); - tqDebug("vgId:%d data is over, ret:%d, consumerId:0x%" PRIx64 ", register to pHandle:%p, pCont:%p, len:%d", vgId, ret, + tqInfo("vgId:%d data is over, ret:%d, consumerId:0x%" PRIx64 ", register to pHandle:%p, pCont:%p, len:%d", vgId, ret, pHandle->consumerId, pHandle, pHandle->msg->pCont, pHandle->msg->contLen); return 0; } -int32_t tqUnregisterPushHandle(STQ* pTq, void *handle) { +int tqUnregisterPushHandle(STQ* pTq, void *handle) { STqHandle *pHandle = (STqHandle*)handle; int32_t vgId = TD_VID(pTq->pVnode); @@ -88,7 +91,7 @@ int32_t tqUnregisterPushHandle(STQ* pTq, void *handle) { return 0; } int32_t ret = taosHashRemove(pTq->pPushMgr, pHandle->subKey, strlen(pHandle->subKey)); - tqDebug("vgId:%d remove pHandle:%p,ret:%d consumer Id:0x%" PRIx64, vgId, pHandle, ret, pHandle->consumerId); + tqInfo("vgId:%d remove pHandle:%p,ret:%d consumer Id:0x%" PRIx64, vgId, pHandle, ret, pHandle->consumerId); if(pHandle->msg != NULL) { // tqPushDataRsp(pHandle, vgId); diff --git a/source/dnode/vnode/src/tq/tqRead.c b/source/dnode/vnode/src/tq/tqRead.c index ba983b1833fa04ca26d6093d07f9f85eafef36ec..9b8f1781cb47182156d7834adb1cbb2c5fbf2050 100644 --- a/source/dnode/vnode/src/tq/tqRead.c +++ b/source/dnode/vnode/src/tq/tqRead.c @@ -196,7 +196,7 @@ int32_t tqFetchLog(STQ* pTq, STqHandle* pHandle, int64_t* fetchOffset, SWalCkHea tqDebug("tmq poll: consumer:0x%" PRIx64 ", (epoch %d) vgId:%d offset %" PRId64 ", no more log to return, reqId:0x%" PRIx64, pHandle->consumerId, pHandle->epoch, vgId, offset, reqId); - *fetchOffset = offset - 1; + *fetchOffset = offset; code = -1; goto END; } @@ -302,13 +302,17 @@ int32_t tqReaderSeek(STqReader* pReader, int64_t ver, const char* id) { return 0; } -int32_t extractMsgFromWal(SWalReader* pReader, void** pItem, const char* id) { +int32_t extractMsgFromWal(SWalReader* pReader, void** pItem, int64_t maxVer, const char* id) { int32_t code = walNextValidMsg(pReader); if (code != TSDB_CODE_SUCCESS) { return code; } int64_t ver = pReader->pHead->head.version; + if (ver > maxVer) { + tqDebug("maxVer in WAL:%"PRId64" reached current:%"PRId64", do not scan wal anymore, %s", maxVer, ver, id); + return TSDB_CODE_SUCCESS; + } if (pReader->pHead->head.msgType == TDMT_VND_SUBMIT) { void* pBody = POINTER_SHIFT(pReader->pHead->head.body, sizeof(SSubmitReq2Msg)); @@ -336,6 +340,7 @@ int32_t extractMsgFromWal(SWalReader* pReader, void** pItem, const char* id) { int32_t len = pReader->pHead->head.bodyLen - sizeof(SMsgHead); extractDelDataBlock(pBody, len, ver, (SStreamRefDataBlock**)pItem); + tqDebug("s-task:%s delete msg extract from WAL, len:%d, ver:%"PRId64, id, len, ver); } else { ASSERT(0); } diff --git a/source/dnode/vnode/src/tq/tqRestore.c b/source/dnode/vnode/src/tq/tqRestore.c index 5db3e735ccbf63c41cc88372b09b9682060334c5..c3e7d03e4397f5d21c1866e3d2117646cfe5cf2e 100644 --- a/source/dnode/vnode/src/tq/tqRestore.c +++ b/source/dnode/vnode/src/tq/tqRestore.c @@ -38,9 +38,7 @@ int32_t tqStreamTasksScanWal(STQ* pTq) { if (shouldIdle) { taosWLockLatch(&pMeta->lock); - pMeta->walScanCounter -= 1; - times = pMeta->walScanCounter; - + times = (--pMeta->walScanCounter); ASSERT(pMeta->walScanCounter >= 0); if (pMeta->walScanCounter <= 0) { @@ -80,11 +78,17 @@ int32_t tqStreamTasksStatusCheck(STQ* pTq) { continue; } - streamTaskCheckDownstreamTasks(pTask); + if (pTask->info.fillHistory == 1) { + tqDebug("s-task:%s fill-history task, wait for related stream task:0x%x to launch it", pTask->id.idStr, + pTask->streamTaskId.taskId); + continue; + } + + streamTaskDoCheckDownstreamTasks(pTask); streamMetaReleaseTask(pMeta, pTask); } - taosArrayDestroy(pTaskList); + taosArrayDestroy(pTaskList); return 0; } @@ -205,6 +209,17 @@ int32_t doSetOffsetForWalReader(SStreamTask *pTask, int32_t vgId) { return TSDB_CODE_SUCCESS; } +static void checkForFillHistoryVerRange(SStreamTask* pTask, int64_t ver) { + if ((pTask->info.fillHistory == 1) && ver > pTask->dataRange.range.maxVer) { + qWarn("s-task:%s fill-history scan WAL, currentVer:%" PRId64 " reach the maximum ver:%" PRId64 + ", not scan wal anymore, set the transfer state flag", + pTask->id.idStr, ver, pTask->dataRange.range.maxVer); + pTask->status.transferState = true; + + /*int32_t code = */streamSchedExec(pTask); + } +} + int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle) { *pScanIdle = true; bool noDataInWal = true; @@ -234,7 +249,9 @@ int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle) { } int32_t status = pTask->status.taskStatus; - if (pTask->info.taskLevel != TASK_LEVEL__SOURCE) { + + // non-source or fill-history tasks don't need to response the WAL scan action. + if ((pTask->info.taskLevel != TASK_LEVEL__SOURCE) || (pTask->status.downstreamReady == 0)) { streamMetaReleaseTask(pStreamMeta, pTask); continue; } @@ -245,6 +262,15 @@ int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle) { continue; } + if ((pTask->info.fillHistory == 1) && pTask->status.transferState) { + ASSERT(status == TASK_STATUS__NORMAL); + // the maximum version of data in the WAL has reached already, the step2 is done + tqDebug("s-task:%s fill-history reach the maximum ver:%" PRId64 ", not scan wal anymore", pTask->id.idStr, + pTask->dataRange.range.maxVer); + streamMetaReleaseTask(pStreamMeta, pTask); + continue; + } + if (tInputQueueIsFull(pTask)) { tqTrace("s-task:%s input queue is full, do nothing", pTask->id.idStr); streamMetaReleaseTask(pStreamMeta, pTask); @@ -261,12 +287,13 @@ int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle) { } int32_t numOfItemsInQ = taosQueueItemSize(pTask->inputQueue->queue); + int64_t maxVer = (pTask->info.fillHistory == 1)? pTask->dataRange.range.maxVer:INT64_MAX; - // append the data for the stream SStreamQueueItem* pItem = NULL; - code = extractMsgFromWal(pTask->exec.pWalReader, (void**) &pItem, pTask->id.idStr); + code = extractMsgFromWal(pTask->exec.pWalReader, (void**) &pItem, maxVer, pTask->id.idStr); if ((code != TSDB_CODE_SUCCESS || pItem == NULL) && (numOfItemsInQ == 0)) { // failed, continue + checkForFillHistoryVerRange(pTask, walReaderGetCurrentVer(pTask->exec.pWalReader)); streamMetaReleaseTask(pStreamMeta, pTask); continue; } @@ -275,9 +302,10 @@ int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle) { noDataInWal = false; code = tAppendDataToInputQueue(pTask, pItem); if (code == TSDB_CODE_SUCCESS) { - pTask->chkInfo.currentVer = walReaderGetCurrentVer(pTask->exec.pWalReader); - tqDebug("s-task:%s set the ver:%" PRId64 " from WALReader after extract block from WAL", pTask->id.idStr, - pTask->chkInfo.currentVer); + int64_t ver = walReaderGetCurrentVer(pTask->exec.pWalReader); + pTask->chkInfo.currentVer = ver; + checkForFillHistoryVerRange(pTask, ver); + tqDebug("s-task:%s set the ver:%" PRId64 " from WALReader after extract block from WAL", pTask->id.idStr, ver); } else { tqError("s-task:%s append input queue failed, too many in inputQ, ver:%" PRId64, pTask->id.idStr, pTask->chkInfo.currentVer); diff --git a/source/dnode/vnode/src/tq/tqUtil.c b/source/dnode/vnode/src/tq/tqUtil.c index 11bfcf7fc528e0426043eb08df00dc395e923a25..55a1cecafe6f8ea949c04598a7bc262ee5f043ce 100644 --- a/source/dnode/vnode/src/tq/tqUtil.c +++ b/source/dnode/vnode/src/tq/tqUtil.c @@ -20,21 +20,6 @@ static int32_t tqSendMetaPollRsp(STqHandle* pHandle, const SRpcMsg* pMsg, const SMqPollReq* pReq, const SMqMetaRsp* pRsp, int32_t vgId); -int32_t tqAddInputBlockNLaunchTask(SStreamTask* pTask, SStreamQueueItem* pQueueItem) { - int32_t code = tAppendDataToInputQueue(pTask, pQueueItem); - if (code < 0) { - tqError("s-task:%s failed to put into queue, too many", pTask->id.idStr); - return -1; - } - - if (streamSchedExec(pTask) < 0) { - tqError("stream task:%d failed to be launched, code:%s", pTask->id.taskId, tstrerror(terrno)); - return -1; - } - - return TSDB_CODE_SUCCESS; -} - int32_t tqInitDataRsp(SMqDataRsp* pRsp, const SMqPollReq* pReq) { pRsp->reqOffset = pReq->reqOffset; @@ -113,14 +98,14 @@ static int32_t extractResetOffsetVal(STqOffsetVal* pOffsetVal, STQ* pTq, STqHand } } else { walRefFirstVer(pTq->pVnode->pWal, pHandle->pRef); - tqOffsetResetToLog(pOffsetVal, pHandle->pRef->refVer - 1); + tqOffsetResetToLog(pOffsetVal, pHandle->pRef->refVer); } } else if (reqOffset.type == TMQ_OFFSET__RESET_LATEST) { walRefLastVer(pTq->pVnode->pWal, pHandle->pRef); SMqDataRsp dataRsp = {0}; tqInitDataRsp(&dataRsp, pRequest); - tqOffsetResetToLog(&dataRsp.rspOffset, pHandle->pRef->refVer); + tqOffsetResetToLog(&dataRsp.rspOffset, pHandle->pRef->refVer + 1); tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vgId:%d, (latest) offset reset to %" PRId64, consumerId, pHandle->subKey, vgId, dataRsp.rspOffset.version); int32_t code = tqSendDataRsp(pHandle, pMsg, pRequest, &dataRsp, TMQ_MSG_TYPE__POLL_DATA_RSP, vgId); @@ -140,18 +125,24 @@ static int32_t extractResetOffsetVal(STqOffsetVal* pOffsetVal, STQ* pTq, STqHand return 0; } +static void setRequestVersion(STqOffsetVal* offset, int64_t ver){ + if(offset->type == TMQ_OFFSET__LOG){ + offset->version = ver + 1; + } +} + static int32_t extractDataAndRspForNormalSubscribe(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, SRpcMsg* pMsg, STqOffsetVal* pOffset) { uint64_t consumerId = pRequest->consumerId; int32_t vgId = TD_VID(pTq->pVnode); - int code = 0; terrno = 0; SMqDataRsp dataRsp = {0}; tqInitDataRsp(&dataRsp, pRequest); + dataRsp.reqOffset.type = pOffset->type; // stroe origin type for getting offset in tmq_get_vgroup_offset qSetTaskId(pHandle->execHandle.task, consumerId, pRequest->reqId); - code = tqScanData(pTq, pHandle, &dataRsp, pOffset); + int code = tqScanData(pTq, pHandle, &dataRsp, pOffset); if (code != 0 && terrno != TSDB_CODE_WAL_LOG_NOT_EXIST) { goto end; } @@ -166,11 +157,11 @@ static int32_t extractDataAndRspForNormalSubscribe(STQ* pTq, STqHandle* pHandle, code = tqRegisterPushHandle(pTq, pHandle, pMsg); taosWUnLockLatch(&pTq->lock); goto end; - } else { - taosWUnLockLatch(&pTq->lock); } + taosWUnLockLatch(&pTq->lock); } + setRequestVersion(&dataRsp.reqOffset, pOffset->version); code = tqSendDataRsp(pHandle, pMsg, pRequest, (SMqDataRsp*)&dataRsp, TMQ_MSG_TYPE__POLL_DATA_RSP, vgId); end : { @@ -192,6 +183,7 @@ static int32_t extractDataAndRspForDbStbSubscribe(STQ* pTq, STqHandle* pHandle, SMqMetaRsp metaRsp = {0}; STaosxRsp taosxRsp = {0}; tqInitTaosxRsp(&taosxRsp, pRequest); + taosxRsp.reqOffset.type = offset->type; // store origin type for getting offset in tmq_get_vgroup_offset if (offset->type != TMQ_OFFSET__LOG) { if (tqScanTaosx(pTq, pHandle, &taosxRsp, &metaRsp, offset) < 0) { @@ -223,7 +215,7 @@ static int32_t extractDataAndRspForDbStbSubscribe(STQ* pTq, STqHandle* pHandle, if (offset->type == TMQ_OFFSET__LOG) { walReaderVerifyOffset(pHandle->pWalReader, offset); - int64_t fetchVer = offset->version + 1; + int64_t fetchVer = offset->version; pCkHead = taosMemoryMalloc(sizeof(SWalCkHead) + 2048); if (pCkHead == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -244,6 +236,7 @@ static int32_t extractDataAndRspForDbStbSubscribe(STQ* pTq, STqHandle* pHandle, if (tqFetchLog(pTq, pHandle, &fetchVer, &pCkHead, pRequest->reqId) < 0) { tqOffsetResetToLog(&taosxRsp.rspOffset, fetchVer); + setRequestVersion(&taosxRsp.reqOffset, offset->version); code = tqSendDataRsp(pHandle, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__POLL_DATA_RSP, vgId); goto end; } @@ -255,13 +248,14 @@ static int32_t extractDataAndRspForDbStbSubscribe(STQ* pTq, STqHandle* pHandle, // process meta if (pHead->msgType != TDMT_VND_SUBMIT) { if (totalRows > 0) { - tqOffsetResetToLog(&taosxRsp.rspOffset, fetchVer - 1); + tqOffsetResetToLog(&taosxRsp.rspOffset, fetchVer); + setRequestVersion(&taosxRsp.reqOffset, offset->version); code = tqSendDataRsp(pHandle, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__POLL_DATA_RSP, vgId); goto end; } tqDebug("fetch meta msg, ver:%" PRId64 ", type:%s", pHead->version, TMSG_INFO(pHead->msgType)); - tqOffsetResetToLog(&metaRsp.rspOffset, fetchVer); + tqOffsetResetToLog(&metaRsp.rspOffset, fetchVer + 1); metaRsp.resMsgType = pHead->msgType; metaRsp.metaRspLen = pHead->bodyLen; metaRsp.metaRsp = pHead->body; @@ -284,7 +278,8 @@ static int32_t extractDataAndRspForDbStbSubscribe(STQ* pTq, STqHandle* pHandle, } if (totalRows >= 4096 || taosxRsp.createTableNum > 0) { - tqOffsetResetToLog(&taosxRsp.rspOffset, fetchVer); + tqOffsetResetToLog(&taosxRsp.rspOffset, fetchVer + 1); + setRequestVersion(&taosxRsp.reqOffset, offset->version); code = tqSendDataRsp(pHandle, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, taosxRsp.createTableNum > 0 ? TMQ_MSG_TYPE__POLL_DATA_META_RSP : TMQ_MSG_TYPE__POLL_DATA_RSP, vgId); goto end; } else { @@ -318,9 +313,12 @@ int32_t tqExtractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequ if (blockReturned) { return 0; } - } else { // use the consumer specified offset + } else if(reqOffset.type != 0){ // use the consumer specified offset // the offset value can not be monotonious increase?? offset = reqOffset; + } else { + uError("req offset type is 0"); + return TSDB_CODE_TMQ_INVALID_MSG; } // this is a normal subscribe requirement diff --git a/source/dnode/vnode/src/tsdb/tsdbCache.c b/source/dnode/vnode/src/tsdb/tsdbCache.c index d0986b25f02c8e7ccf3c888ba446e2b402d21a56..7d8cf5b67852c689920ef610a9c9e4442bfb5956 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCache.c +++ b/source/dnode/vnode/src/tsdb/tsdbCache.c @@ -13,6 +13,8 @@ * along with this program. If not, see . */ #include "tsdb.h" +#include "tsdbDataFileRW.h" +#include "tsdbReadUtil.h" #include "vnd.h" #define ROCKS_BATCH_SIZE (4096) @@ -1018,63 +1020,15 @@ int32_t tsdbCacheGetBatch(STsdb *pTsdb, tb_uid_t uid, SArray *pLastArray, SCache code = tsdbCacheLoadFromRocks(pTsdb, uid, pLastArray, remainCols, pr, ltype); taosThreadMutexUnlock(&pTsdb->lruMutex); - } - - if (remainCols) { - taosArrayDestroy(remainCols); - } - - return code; -} -/* -int32_t tsdbCacheGet(STsdb *pTsdb, tb_uid_t uid, SArray *pLastArray, SCacheRowsReader *pr, int8_t ltype) { - int32_t code = 0; - SLRUCache *pCache = pTsdb->lruCache; - SArray *pCidList = pr->pCidList; - int num_keys = TARRAY_SIZE(pCidList); - - for (int i = 0; i < num_keys; ++i) { - SLastCol *pLastCol = NULL; - int16_t cid = *(int16_t *)taosArrayGet(pCidList, i); - - SLastKey *key = &(SLastKey){.ltype = ltype, .uid = uid, .cid = cid}; - LRUHandle *h = taosLRUCacheLookup(pCache, key, ROCKS_KEY_LEN); - if (!h) { - taosThreadMutexLock(&pTsdb->lruMutex); - h = taosLRUCacheLookup(pCache, key, ROCKS_KEY_LEN); - if (!h) { - pLastCol = tsdbCacheLoadCol(pTsdb, pr, pr->pSlotIds[i], uid, cid, ltype); - - size_t charge = sizeof(*pLastCol); - if (IS_VAR_DATA_TYPE(pLastCol->colVal.type)) { - charge += pLastCol->colVal.value.nData; - } - - LRUStatus status = taosLRUCacheInsert(pCache, key, ROCKS_KEY_LEN, pLastCol, charge, tsdbCacheDeleter, &h, - TAOS_LRU_PRIORITY_LOW, &pTsdb->flushState); - if (status != TAOS_LRU_STATUS_OK) { - code = -1; - } - } - - taosThreadMutexUnlock(&pTsdb->lruMutex); - } - - pLastCol = (SLastCol *)taosLRUCacheValue(pCache, h); - SLastCol lastCol = *pLastCol; - reallocVarData(&lastCol.colVal); - - if (h) { - taosLRUCacheRelease(pCache, h, false); + if (remainCols) { + taosArrayDestroy(remainCols); } - - taosArrayPush(pLastArray, &lastCol); } return code; } -*/ + int32_t tsdbCacheDel(STsdb *pTsdb, tb_uid_t suid, tb_uid_t uid, TSKEY sKey, TSKEY eKey) { int32_t code = 0; // fetch schema @@ -1106,6 +1060,7 @@ int32_t tsdbCacheDel(STsdb *pTsdb, tb_uid_t suid, tb_uid_t uid, TSKEY sKey, TSKE char **values_list = taosMemoryCalloc(num_keys * 2, sizeof(char *)); size_t *values_list_sizes = taosMemoryCalloc(num_keys * 2, sizeof(size_t)); char **errs = taosMemoryCalloc(num_keys * 2, sizeof(char *)); + taosThreadMutexLock(&pTsdb->lruMutex); taosThreadMutexLock(&pTsdb->rCache.rMutex); rocksMayWrite(pTsdb, true, false, false); rocksdb_multi_get(pTsdb->rCache.db, pTsdb->rCache.readoptions, num_keys * 2, (const char *const *)keys_list, @@ -1135,7 +1090,7 @@ int32_t tsdbCacheDel(STsdb *pTsdb, tb_uid_t suid, tb_uid_t uid, TSKEY sKey, TSKE rocksdb_free(values_list[i]); rocksdb_free(values_list[i + num_keys]); - taosThreadMutexLock(&pTsdb->lruMutex); + // taosThreadMutexLock(&pTsdb->lruMutex); LRUHandle *h = taosLRUCacheLookup(pTsdb->lruCache, keys_list[i], klen); if (h) { @@ -1157,7 +1112,7 @@ int32_t tsdbCacheDel(STsdb *pTsdb, tb_uid_t suid, tb_uid_t uid, TSKEY sKey, TSKE } taosLRUCacheErase(pTsdb->lruCache, keys_list[num_keys + i], klen); - taosThreadMutexUnlock(&pTsdb->lruMutex); + // taosThreadMutexUnlock(&pTsdb->lruMutex); } for (int i = 0; i < num_keys; ++i) { taosMemoryFree(keys_list[i]); @@ -1169,6 +1124,8 @@ int32_t tsdbCacheDel(STsdb *pTsdb, tb_uid_t suid, tb_uid_t uid, TSKEY sKey, TSKE rocksMayWrite(pTsdb, true, false, true); + taosThreadMutexUnlock(&pTsdb->lruMutex); + _exit: taosMemoryFree(pTSchema); @@ -1309,62 +1266,7 @@ int32_t tsdbCacheDeleteLast(SLRUCache *pCache, tb_uid_t uid, TSKEY eKey) { return code; } -/* -int32_t tsdbCacheDelete(SLRUCache *pCache, tb_uid_t uid, TSKEY eKey) { - int32_t code = 0; - char key[32] = {0}; - int keyLen = 0; - - // getTableCacheKey(uid, "lr", key, &keyLen); - getTableCacheKey(uid, 0, key, &keyLen); - LRUHandle *h = taosLRUCacheLookup(pCache, key, keyLen); - if (h) { - SArray *pLast = (SArray *)taosLRUCacheValue(pCache, h); - bool invalidate = false; - int16_t nCol = taosArrayGetSize(pLast); - - for (int16_t iCol = 0; iCol < nCol; ++iCol) { - SLastCol *tTsVal = (SLastCol *)taosArrayGet(pLast, iCol); - if (eKey >= tTsVal->ts) { - invalidate = true; - break; - } - } - - if (invalidate) { - taosLRUCacheRelease(pCache, h, true); - } else { - taosLRUCacheRelease(pCache, h, false); - } - } - - // getTableCacheKey(uid, "l", key, &keyLen); - getTableCacheKey(uid, 1, key, &keyLen); - h = taosLRUCacheLookup(pCache, key, keyLen); - if (h) { - SArray *pLast = (SArray *)taosLRUCacheValue(pCache, h); - bool invalidate = false; - int16_t nCol = taosArrayGetSize(pLast); - - for (int16_t iCol = 0; iCol < nCol; ++iCol) { - SLastCol *tTsVal = (SLastCol *)taosArrayGet(pLast, iCol); - if (eKey >= tTsVal->ts) { - invalidate = true; - break; - } - } - - if (invalidate) { - taosLRUCacheRelease(pCache, h, true); - } else { - taosLRUCacheRelease(pCache, h, false); - } - // void taosLRUCacheErase(SLRUCache * cache, const void *key, size_t keyLen); - } - return code; -} -*/ int32_t tsdbCacheInsertLastrow(SLRUCache *pCache, STsdb *pTsdb, tb_uid_t uid, TSDBROW *row, bool dup) { int32_t code = 0; STSRow *cacheRow = NULL; @@ -1689,533 +1591,587 @@ _err: } return code; } -/* -static int32_t getTableDelIdx(SDelFReader *pDelFReader, tb_uid_t suid, tb_uid_t uid, SDelIdx *pDelIdx) { - int32_t code = 0; - SArray *pDelIdxArray = NULL; - // SMapData delIdxMap; - pDelIdxArray = taosArrayInit(32, sizeof(SDelIdx)); - SDelIdx idx = {.suid = suid, .uid = uid}; +static void freeTableInfoFunc(void *param) { + void **p = (void **)param; + taosMemoryFreeClear(*p); +} - // tMapDataReset(&delIdxMap); - code = tsdbReadDelIdx(pDelFReader, pDelIdxArray); - if (code) goto _err; +static STableLoadInfo *getTableLoadInfo(SCacheRowsReader *pReader, uint64_t uid) { + if (!pReader->pTableMap) { + pReader->pTableMap = tSimpleHashInit(pReader->numOfTables, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT)); - // code = tMapDataSearch(&delIdxMap, &idx, tGetDelIdx, tCmprDelIdx, pDelIdx); - SDelIdx *pIdx = taosArraySearch(pDelIdxArray, &idx, tCmprDelIdx, TD_EQ); + tSimpleHashSetFreeFp(pReader->pTableMap, freeTableInfoFunc); + } - *pDelIdx = *pIdx; + STableLoadInfo *pInfo = NULL; + STableLoadInfo **ppInfo = tSimpleHashGet(pReader->pTableMap, &uid, sizeof(uid)); + if (!ppInfo) { + pInfo = taosMemoryCalloc(1, sizeof(STableLoadInfo)); + tSimpleHashPut(pReader->pTableMap, &uid, sizeof(uint64_t), &pInfo, POINTER_BYTES); -_err: - if (pDelIdxArray) { - taosArrayDestroy(pDelIdxArray); + return pInfo; } - return code; + + return *ppInfo; } -*/ -typedef enum { - SFSLASTNEXTROW_FS, - SFSLASTNEXTROW_FILESET, - SFSLASTNEXTROW_BLOCKDATA, - SFSLASTNEXTROW_BLOCKROW -} SFSLASTNEXTROWSTATES; -typedef struct { - SFSLASTNEXTROWSTATES state; // [input] - STsdb *pTsdb; // [input] - STSchema *pTSchema; // [input] - tb_uid_t suid; - tb_uid_t uid; - int32_t nFileSet; - int32_t iFileSet; - SArray *aDFileSet; - SDataFReader **pDataFReader; - TSDBROW row; - - bool checkRemainingRow; - SMergeTree mergeTree; - SMergeTree *pMergeTree; - SSttBlockLoadInfo *pLoadInfo; - SLDataIter *pDataIter; - int64_t lastTs; -} SFSLastNextRowIter; - -static int32_t getNextRowFromFSLast(void *iter, TSDBROW **ppRow, bool *pIgnoreEarlierTs, bool isLast, int16_t *aCols, - int nCols) { - SFSLastNextRowIter *state = (SFSLastNextRowIter *)iter; - int32_t code = 0; - bool checkRemainingRow = true; +static uint64_t *getUidList(SCacheRowsReader *pReader) { + if (!pReader->uidList) { + int32_t numOfTables = pReader->numOfTables; - switch (state->state) { - case SFSLASTNEXTROW_FS: - state->nFileSet = taosArrayGetSize(state->aDFileSet); - state->iFileSet = state->nFileSet; + pReader->uidList = taosMemoryMalloc(numOfTables * sizeof(uint64_t)); - case SFSLASTNEXTROW_FILESET: { - SDFileSet *pFileSet = NULL; - _next_fileset: - if (state->pMergeTree != NULL) { - tMergeTreeClose(state->pMergeTree); - state->pMergeTree = NULL; - } + for (int32_t i = 0; i < numOfTables; ++i) { + uint64_t uid = pReader->pTableList[i].uid; + pReader->uidList[i] = uid; + } - if (--state->iFileSet >= 0) { - pFileSet = (SDFileSet *)taosArrayGet(state->aDFileSet, state->iFileSet); - } else { - *ppRow = NULL; - return code; - } + taosSort(pReader->uidList, numOfTables, sizeof(uint64_t), uidComparFunc); + } - if (*state->pDataFReader == NULL || (*state->pDataFReader)->pSet->fid != pFileSet->fid) { - if (*state->pDataFReader != NULL) { - tsdbDataFReaderClose(state->pDataFReader); + return pReader->uidList; +} - resetLastBlockLoadInfo(state->pLoadInfo); - } +static int32_t loadTombFromBlk(const TTombBlkArray *pTombBlkArray, SCacheRowsReader *pReader, void *pFileReader, + bool isFile) { + int32_t code = 0; + uint64_t *uidList = getUidList(pReader); + int32_t numOfTables = pReader->numOfTables; + int64_t suid = pReader->info.suid; - code = tsdbDataFReaderOpen(state->pDataFReader, state->pTsdb, pFileSet); - if (code) goto _err; + for (int i = 0, j = 0; i < pTombBlkArray->size && j < numOfTables; ++i) { + STombBlk *pTombBlk = &pTombBlkArray->data[i]; + if (pTombBlk->maxTbid.suid < suid || (pTombBlk->maxTbid.suid == suid && pTombBlk->maxTbid.uid < uidList[0])) { + continue; + } + + if (pTombBlk->minTbid.suid > suid || + (pTombBlk->minTbid.suid == suid && pTombBlk->minTbid.uid > uidList[numOfTables - 1])) { + break; + } + + STombBlock block = {0}; + code = isFile ? tsdbDataFileReadTombBlock(pFileReader, &pTombBlkArray->data[i], &block) + : tsdbSttFileReadTombBlock(pFileReader, &pTombBlkArray->data[i], &block); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + uint64_t uid = uidList[j]; + STableLoadInfo *pInfo = getTableLoadInfo(pReader, uid); + if (pInfo->pTombData == NULL) { + pInfo->pTombData = taosArrayInit(4, sizeof(SDelData)); + } + + STombRecord record = {0}; + bool finished = false; + for (int32_t k = 0; k < TARRAY2_SIZE(block.suid); ++k) { + code = tTombBlockGet(&block, k, &record); + if (code != TSDB_CODE_SUCCESS) { + finished = true; + break; } - int nTmpCols = nCols; - bool hasTs = false; - if (aCols[0] == PRIMARYKEY_TIMESTAMP_COL_ID) { - --nTmpCols; - hasTs = true; + if (record.suid < suid) { + continue; } - for (int i = 0; i < state->pLoadInfo->numOfStt; ++i) { - state->pLoadInfo[i].colIds = hasTs ? aCols + 1 : aCols; - state->pLoadInfo[i].numOfCols = nTmpCols; - state->pLoadInfo[i].isLast = isLast; + if (record.suid > suid) { + finished = true; + break; } - tMergeTreeOpen(&state->mergeTree, 1, *state->pDataFReader, state->suid, state->uid, - &(STimeWindow){.skey = state->lastTs, .ekey = TSKEY_MAX}, - &(SVersionRange){.minVer = 0, .maxVer = UINT64_MAX}, state->pLoadInfo, false, NULL, true, - state->pDataIter); - state->pMergeTree = &state->mergeTree; - state->state = SFSLASTNEXTROW_BLOCKROW; - } - case SFSLASTNEXTROW_BLOCKROW: { - if (nCols != state->pLoadInfo->numOfCols) { - for (int i = 0; i < state->pLoadInfo->numOfStt; ++i) { - state->pLoadInfo[i].numOfCols = nCols; - state->pLoadInfo[i].checkRemainingRow = state->checkRemainingRow; + bool newTable = false; + if (uid < record.uid) { + while (j < numOfTables && uidList[j] < record.uid) { + ++j; + newTable = true; } - } - bool hasVal = tMergeTreeNext(&state->mergeTree); - if (!hasVal) { - if (tMergeTreeIgnoreEarlierTs(&state->mergeTree)) { - *pIgnoreEarlierTs = true; - *ppRow = NULL; - return code; + + if (j >= numOfTables) { + finished = true; + break; } - state->state = SFSLASTNEXTROW_FILESET; - goto _next_fileset; + + uid = uidList[j]; } - state->row = *tMergeTreeGetRow(&state->mergeTree); - *ppRow = &state->row; - if (TSDBROW_TS(&state->row) <= state->lastTs) { - *pIgnoreEarlierTs = true; - *ppRow = NULL; - return code; + if (record.uid < uid) { + continue; } - *pIgnoreEarlierTs = false; - /* - if (!hasVal) { - state->state = SFSLASTNEXTROW_FILESET; + if (newTable) { + pInfo = getTableLoadInfo(pReader, uid); + if (pInfo->pTombData == NULL) { + pInfo->pTombData = taosArrayInit(4, sizeof(SDelData)); + } } - */ - if (!state->checkRemainingRow) { - state->checkRemainingRow = true; + + if (record.version <= pReader->info.verRange.maxVer) { + tsdbError("tomb xx load/cache: vgId:%d fid:%d commit %" PRId64 "~%" PRId64 "~%" PRId64 " tomb records", + TD_VID(pReader->pTsdb->pVnode), pReader->pCurFileSet->fid, record.skey, record.ekey, uid); + + SDelData delData = {.version = record.version, .sKey = record.skey, .eKey = record.ekey}; + taosArrayPush(pInfo->pTombData, &delData); } + } + + tTombBlockDestroy(&block); + + if (finished) { return code; } - default: - ASSERT(0); - break; } -_err: - /*if (state->pDataFReader) { - tsdbDataFReaderClose(&state->pDataFReader); - state->pDataFReader = NULL; - }*/ - if (state->pMergeTree != NULL) { - tMergeTreeClose(state->pMergeTree); - state->pMergeTree = NULL; - } + return TSDB_CODE_SUCCESS; +} - *ppRow = NULL; +static int32_t loadDataTomb(SCacheRowsReader *pReader, SDataFileReader *pFileReader) { + int32_t code = 0; - return code; + const TTombBlkArray *pBlkArray = NULL; + code = tsdbDataFileReadTombBlk(pFileReader, &pBlkArray); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + return loadTombFromBlk(pBlkArray, pReader, pFileReader, true); } -int32_t clearNextRowFromFSLast(void *iter) { - SFSLastNextRowIter *state = (SFSLastNextRowIter *)iter; - int32_t code = 0; +static int32_t loadSttTomb(STsdbReader *pTsdbReader, SSttFileReader *pSttFileReader, SSttBlockLoadInfo *pLoadInfo) { + int32_t code = 0; + + SCacheRowsReader *pReader = (SCacheRowsReader *)pTsdbReader; - if (!state) { + const TTombBlkArray *pBlkArray = NULL; + code = tsdbSttFileReadTombBlk(pSttFileReader, &pBlkArray); + if (code != TSDB_CODE_SUCCESS) { return code; } - /* - if (state->pDataFReader) { - tsdbDataFReaderClose(&state->pDataFReader); - state->pDataFReader = NULL; + + return loadTombFromBlk(pBlkArray, pReader, pSttFileReader, false); +} + +typedef struct { + SMergeTree mergeTree; + SMergeTree *pMergeTree; +} SFSLastIter; + +static int32_t lastIterOpen(SFSLastIter *iter, STFileSet *pFileSet, STsdb *pTsdb, STSchema *pTSchema, tb_uid_t suid, + tb_uid_t uid, SCacheRowsReader *pr, int64_t lastTs, int16_t *aCols, int nCols) { + int32_t code = 0; + + int64_t loadBlocks = 0; + double elapse = 0; + pr->pLDataIterArray = destroySttBlockReader(pr->pLDataIterArray, &loadBlocks, &elapse); + pr->pLDataIterArray = taosArrayInit(4, POINTER_BYTES); + + SMergeTreeConf conf = { + .uid = uid, + .suid = suid, + .pTsdb = pTsdb, + .timewindow = (STimeWindow){.skey = lastTs, .ekey = TSKEY_MAX}, + .verRange = (SVersionRange){.minVer = 0, .maxVer = UINT64_MAX}, + .strictTimeRange = false, + .pSchema = pTSchema, + .pCurrentFileset = pFileSet, + .backward = 1, + .pSttFileBlockIterArray = pr->pLDataIterArray, + .pCols = aCols, + .numOfCols = nCols, + .loadTombFn = loadSttTomb, + .pReader = pr, + .idstr = pr->idstr, + }; + + code = tMergeTreeOpen2(&iter->mergeTree, &conf); + if (code != TSDB_CODE_SUCCESS) { + return -1; + } + + iter->pMergeTree = &iter->mergeTree; + + return code; +} + +static int32_t lastIterClose(SFSLastIter **iter) { + int32_t code = 0; + + if ((*iter)->pMergeTree) { + tMergeTreeClose((*iter)->pMergeTree); + (*iter)->pMergeTree = NULL; } - */ - if (state->pMergeTree != NULL) { - tMergeTreeClose(state->pMergeTree); - state->pMergeTree = NULL; + + *iter = NULL; + + return code; +} + +static int32_t lastIterNext(SFSLastIter *iter, TSDBROW **ppRow) { + int32_t code = 0; + + bool hasVal = tMergeTreeNext(iter->pMergeTree); + if (!hasVal) { + *ppRow = NULL; + return code; } + *ppRow = tMergeTreeGetRow(iter->pMergeTree); + return code; } typedef enum SFSNEXTROWSTATES { SFSNEXTROW_FS, SFSNEXTROW_FILESET, + SFSNEXTROW_INDEXLIST, + SFSNEXTROW_BRINBLOCK, + SFSNEXTROW_BRINRECORD, SFSNEXTROW_BLOCKDATA, - SFSNEXTROW_BLOCKROW + SFSNEXTROW_BLOCKROW, + SFSNEXTROW_NEXTSTTROW } SFSNEXTROWSTATES; +struct CacheNextRowIter; + typedef struct SFSNextRowIter { - SFSNEXTROWSTATES state; // [input] - STsdb *pTsdb; // [input] - SBlockIdx *pBlockIdxExp; // [input] - STSchema *pTSchema; // [input] - tb_uid_t suid; - tb_uid_t uid; - int32_t nFileSet; - int32_t iFileSet; - SArray *aDFileSet; - SDataFReader **pDataFReader; - SArray *aBlockIdx; - LRUHandle *aBlockIdxHandle; - SBlockIdx *pBlockIdx; - SMapData blockMap; - int32_t nBlock; - int32_t iBlock; - SDataBlk block; - SBlockData blockData; - SBlockData *pBlockData; - int32_t nRow; - int32_t iRow; - TSDBROW row; - SSttBlockLoadInfo *pLoadInfo; - int64_t lastTs; + SFSNEXTROWSTATES state; // [input] + SBlockIdx *pBlockIdxExp; // [input] + STSchema *pTSchema; // [input] + tb_uid_t suid; + tb_uid_t uid; + int32_t iFileSet; + STFileSet *pFileSet; + TFileSetArray *aDFileSet; + SArray *pIndexList; + int32_t iBrinIndex; + SBrinBlock brinBlock; + int32_t iBrinRecord; + SBrinRecord brinRecord; + SBlockData blockData; + SBlockData *pBlockData; + int32_t nRow; + int32_t iRow; + TSDBROW row; + int64_t lastTs; + SFSLastIter lastIter; + SFSLastIter *pLastIter; + int8_t lastEmpty; + TSDBROW *pLastRow; + SRow *pTSRow; + SRowMerger rowMerger; + SCacheRowsReader *pr; + struct CacheNextRowIter *pRowIter; } SFSNextRowIter; +static void clearLastFileSet(SFSNextRowIter *state); + static int32_t getNextRowFromFS(void *iter, TSDBROW **ppRow, bool *pIgnoreEarlierTs, bool isLast, int16_t *aCols, int nCols) { SFSNextRowIter *state = (SFSNextRowIter *)iter; int32_t code = 0; - bool checkRemainingRow = true; + STsdb *pTsdb = state->pr->pTsdb; - switch (state->state) { - case SFSNEXTROW_FS: - // state->aDFileSet = state->pTsdb->pFS->cState->aDFileSet; - state->nFileSet = taosArrayGetSize(state->aDFileSet); - state->iFileSet = state->nFileSet; - - state->pBlockData = NULL; - - case SFSNEXTROW_FILESET: { - SDFileSet *pFileSet = NULL; - _next_fileset: - if (--state->iFileSet >= 0) { - pFileSet = (SDFileSet *)taosArrayGet(state->aDFileSet, state->iFileSet); - } else { - // tBlockDataDestroy(&state->blockData, 1); - if (state->pBlockData) { - tBlockDataDestroy(state->pBlockData); - state->pBlockData = NULL; - } + if (SFSNEXTROW_FS == state->state) { + state->iFileSet = TARRAY2_SIZE(state->aDFileSet); - *ppRow = NULL; - return code; - } + state->state = SFSNEXTROW_FILESET; + } - if (*state->pDataFReader == NULL || (*state->pDataFReader)->pSet->fid != pFileSet->fid) { - if (*state->pDataFReader != NULL) { - tsdbDataFReaderClose(state->pDataFReader); + if (SFSNEXTROW_FILESET == state->state) { + _next_fileset: + if (--state->iFileSet < 0) { + clearLastFileSet(state); - // resetLastBlockLoadInfo(state->pLoadInfo); + *ppRow = NULL; + return code; + } else { + state->pFileSet = TARRAY2_GET(state->aDFileSet, state->iFileSet); + } + + STFileObj **pFileObj = state->pFileSet->farr; + if (pFileObj[0] != NULL || pFileObj[3] != NULL) { + if (state->pFileSet != state->pr->pCurFileSet) { + SDataFileReaderConfig conf = {.tsdb = pTsdb, .szPage = pTsdb->pVnode->config.tsdbPageSize}; + const char *filesName[4] = {0}; + if (pFileObj[0] != NULL) { + conf.files[0].file = *pFileObj[0]->f; + conf.files[0].exist = true; + filesName[0] = pFileObj[0]->fname; + + conf.files[1].file = *pFileObj[1]->f; + conf.files[1].exist = true; + filesName[1] = pFileObj[1]->fname; + + conf.files[2].file = *pFileObj[2]->f; + conf.files[2].exist = true; + filesName[2] = pFileObj[2]->fname; } - code = tsdbDataFReaderOpen(state->pDataFReader, state->pTsdb, pFileSet); - if (code) goto _err; + if (pFileObj[3] != NULL) { + conf.files[3].exist = true; + conf.files[3].file = *pFileObj[3]->f; + filesName[3] = pFileObj[3]->fname; + } + + code = tsdbDataFileReaderOpen(filesName, &conf, &state->pr->pFileReader); + if (code != TSDB_CODE_SUCCESS) { + goto _err; + } + + state->pr->pCurFileSet = state->pFileSet; + + loadDataTomb(state->pr, state->pr->pFileReader); + + int32_t code = tsdbDataFileReadBrinBlk(state->pr->pFileReader, &state->pr->pBlkArray); + if (code != TSDB_CODE_SUCCESS) { + goto _err; + } } - // tMapDataReset(&state->blockIdxMap); - /* - if (!state->aBlockIdx) { - state->aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx)); + if (!state->pIndexList) { + state->pIndexList = taosArrayInit(1, sizeof(SBrinBlk)); } else { - taosArrayClear(state->aBlockIdx); - } - code = tsdbReadBlockIdx(*state->pDataFReader, state->aBlockIdx); - if (code) goto _err; - */ - int32_t code = tsdbCacheGetBlockIdx(state->pTsdb->biCache, *state->pDataFReader, &state->aBlockIdxHandle); - if (code != TSDB_CODE_SUCCESS || state->aBlockIdxHandle == NULL) { - goto _err; + taosArrayClear(state->pIndexList); } - state->aBlockIdx = (SArray *)taosLRUCacheValue(state->pTsdb->biCache, state->aBlockIdxHandle); - - /* if (state->pBlockIdx) { */ - /* } */ - /* code = tMapDataSearch(&state->blockIdxMap, state->pBlockIdxExp, tGetBlockIdx, tCmprBlockIdx, - * &state->blockIdx); - */ - state->pBlockIdx = taosArraySearch(state->aBlockIdx, state->pBlockIdxExp, tCmprBlockIdx, TD_EQ); - if (!state->pBlockIdx) { - tsdbBICacheRelease(state->pTsdb->biCache, state->aBlockIdxHandle); - - state->aBlockIdxHandle = NULL; - state->aBlockIdx = NULL; - /* - tsdbDataFReaderClose(state->pDataFReader); - *state->pDataFReader = NULL; - resetLastBlockLoadInfo(state->pLoadInfo);*/ - goto _next_fileset; + + const TBrinBlkArray *pBlkArray = state->pr->pBlkArray; + + for (int i = TARRAY2_SIZE(pBlkArray) - 1; i >= 0; --i) { + SBrinBlk *pBrinBlk = &pBlkArray->data[i]; + if (state->suid >= pBrinBlk->minTbid.suid && state->suid <= pBrinBlk->maxTbid.suid) { + if (state->uid >= pBrinBlk->minTbid.uid && state->uid <= pBrinBlk->maxTbid.uid) { + taosArrayPush(state->pIndexList, pBrinBlk); + } + } else if (state->suid > pBrinBlk->maxTbid.suid || + (state->suid == pBrinBlk->maxTbid.suid && state->uid > pBrinBlk->maxTbid.uid)) { + break; + } } - tMapDataReset(&state->blockMap); - /* - if (state->blockMap.pData != NULL) { - tMapDataClear(&state->blockMap); + int indexSize = TARRAY_SIZE(state->pIndexList); + if (indexSize <= 0) { + goto _check_stt_data; } - */ - code = tsdbReadDataBlk(*state->pDataFReader, state->pBlockIdx, &state->blockMap); - if (code) goto _err; - state->nBlock = state->blockMap.nItem; - state->iBlock = state->nBlock - 1; + state->state = SFSNEXTROW_INDEXLIST; + state->iBrinIndex = indexSize; + } - if (!state->pBlockData) { - state->pBlockData = &state->blockData; + _check_stt_data: + if (state->pFileSet != state->pr->pCurFileSet) { + state->pr->pCurFileSet = state->pFileSet; + } - code = tBlockDataCreate(&state->blockData); - if (code) goto _err; - } + code = lastIterOpen(&state->lastIter, state->pFileSet, pTsdb, state->pTSchema, state->suid, state->uid, state->pr, + state->lastTs, aCols, nCols); + if (code != TSDB_CODE_SUCCESS) { + goto _err; } - case SFSNEXTROW_BLOCKDATA: - _next_datablock: - if (state->iBlock >= 0) { - SDataBlk block = {0}; - bool skipBlock = true; - int inputColIndex = 0; - tDataBlkReset(&block); - tBlockDataReset(state->pBlockData); + code = lastIterNext(&state->lastIter, &state->pLastRow); + if (code != TSDB_CODE_SUCCESS) { + goto _err; + } - tMapDataGetItemByIdx(&state->blockMap, state->iBlock, &block, tGetDataBlk); - if (block.maxKey.ts <= state->lastTs) { - *pIgnoreEarlierTs = true; + if (!state->pLastRow) { + state->lastEmpty = 1; - tBlockDataDestroy(state->pBlockData); - state->pBlockData = NULL; + if (SFSNEXTROW_INDEXLIST != state->state) { + clearLastFileSet(state); + goto _next_fileset; + } + } else { + state->lastEmpty = 0; - *ppRow = NULL; - return code; - } - *pIgnoreEarlierTs = false; - tBlockDataReset(state->pBlockData); - TABLEID tid = {.suid = state->suid, .uid = state->uid}; - int nTmpCols = nCols; - bool hasTs = false; - if (aCols[0] == PRIMARYKEY_TIMESTAMP_COL_ID) { - --nTmpCols; - skipBlock = false; - hasTs = true; - } - code = tBlockDataInit(state->pBlockData, &tid, state->pTSchema, hasTs ? aCols + 1 : aCols, nTmpCols); - if (code) goto _err; + if (SFSNEXTROW_INDEXLIST != state->state) { + state->state = SFSNEXTROW_NEXTSTTROW; - code = tsdbReadDataBlock(*state->pDataFReader, &block, state->pBlockData); - if (code) goto _err; + *ppRow = state->pLastRow; + state->pLastRow = NULL; + return code; + } + } - for (int colIndex = 0; colIndex < state->pBlockData->nColData; ++colIndex) { - SColData *pColData = &state->pBlockData->aColData[colIndex]; + state->pLastIter = &state->lastIter; + } - if (isLast && (pColData->flag & HAS_VALUE)) { - skipBlock = false; - break; - } /*else if (pColData->flag & (HAS_VALUE | HAS_NULL)) { - skipBlock = false; - break; - }*/ - } + if (SFSNEXTROW_NEXTSTTROW == state->state) { + code = lastIterNext(&state->lastIter, &state->pLastRow); + if (code != TSDB_CODE_SUCCESS) { + goto _err; + } - if (!isLast) { - skipBlock = false; - } + if (!state->pLastRow) { + if (state->pLastIter) { + lastIterClose(&state->pLastIter); + } - if (skipBlock) { - if (--state->iBlock < 0) { - tsdbDataFReaderClose(state->pDataFReader); - *state->pDataFReader = NULL; - // resetLastBlockLoadInfo(state->pLoadInfo); + clearLastFileSet(state); + state->state = SFSNEXTROW_FILESET; + goto _next_fileset; + } else { + *ppRow = state->pLastRow; + state->pLastRow = NULL; + return code; + } + } - if (state->aBlockIdx) { - // taosArrayDestroy(state->aBlockIdx); - tsdbBICacheRelease(state->pTsdb->biCache, state->aBlockIdxHandle); + if (SFSNEXTROW_INDEXLIST == state->state) { + SBrinBlk *pBrinBlk = NULL; + _next_brinindex: + if (--state->iBrinIndex < 0) { + if (state->pLastRow) { + state->state = SFSNEXTROW_NEXTSTTROW; + *ppRow = state->pLastRow; + state->pLastRow = NULL; + return code; + } - state->aBlockIdxHandle = NULL; - state->aBlockIdx = NULL; - } + clearLastFileSet(state); + goto _next_fileset; + } else { + pBrinBlk = taosArrayGet(state->pIndexList, state->iBrinIndex); + } - state->state = SFSNEXTROW_FILESET; - goto _next_fileset; - } else { - goto _next_datablock; - } - } + code = tsdbDataFileReadBrinBlock(state->pr->pFileReader, pBrinBlk, &state->brinBlock); + if (code != TSDB_CODE_SUCCESS) { + goto _err; + } - state->nRow = state->blockData.nRow; - state->iRow = state->nRow - 1; + state->iBrinRecord = BRIN_BLOCK_SIZE(&state->brinBlock) - 1; + state->state = SFSNEXTROW_BRINBLOCK; + } - state->state = SFSNEXTROW_BLOCKROW; - checkRemainingRow = false; - } - case SFSNEXTROW_BLOCKROW: { - if (checkRemainingRow) { - bool skipBlock = true; - int inputColIndex = 0; - if (aCols[0] == PRIMARYKEY_TIMESTAMP_COL_ID) { - ++inputColIndex; - } - for (int colIndex = 0; colIndex < state->pBlockData->nColData; ++colIndex) { - SColData *pColData = &state->pBlockData->aColData[colIndex]; - int16_t cid = pColData->cid; - - if (inputColIndex < nCols && cid == aCols[inputColIndex]) { - if (isLast && (pColData->flag & HAS_VALUE)) { - skipBlock = false; - break; - } /*else if (pColData->flag & (HAS_VALUE | HAS_NULL)) { - skipBlock = false; - break; - }*/ - - ++inputColIndex; - } - } + if (SFSNEXTROW_BRINBLOCK == state->state) { + _next_brinrecord: + if (state->iBrinRecord < 0) { // empty brin block, goto _next_brinindex + tBrinBlockClear(&state->brinBlock); + goto _next_brinindex; + } + code = tBrinBlockGet(&state->brinBlock, state->iBrinRecord, &state->brinRecord); + if (code != TSDB_CODE_SUCCESS) { + goto _err; + } - if (!isLast) { - skipBlock = false; - } + SBrinRecord *pRecord = &state->brinRecord; + if (pRecord->uid != state->uid) { + // TODO: goto next brin block early + --state->iBrinRecord; + goto _next_brinrecord; + } - if (skipBlock) { - if (--state->iBlock < 0) { - tsdbDataFReaderClose(state->pDataFReader); - *state->pDataFReader = NULL; - // resetLastBlockLoadInfo(state->pLoadInfo); + state->state = SFSNEXTROW_BRINRECORD; + } - if (state->aBlockIdx) { - // taosArrayDestroy(state->aBlockIdx); - tsdbBICacheRelease(state->pTsdb->biCache, state->aBlockIdxHandle); + if (SFSNEXTROW_BRINRECORD == state->state) { + SBrinRecord *pRecord = &state->brinRecord; - state->aBlockIdxHandle = NULL; - state->aBlockIdx = NULL; - } + if (!state->pBlockData) { + state->pBlockData = &state->blockData; + code = tBlockDataCreate(&state->blockData); + if (code) goto _err; + } else { + tBlockDataReset(state->pBlockData); + } - state->state = SFSNEXTROW_FILESET; - goto _next_fileset; - } else { - goto _next_datablock; - } - } - } + if (aCols[0] == PRIMARYKEY_TIMESTAMP_COL_ID) { + --nCols; + ++aCols; + } + code = tsdbDataFileReadBlockDataByColumn(state->pr->pFileReader, pRecord, state->pBlockData, state->pTSchema, aCols, + nCols); + if (code != TSDB_CODE_SUCCESS) { + goto _err; + } - if (state->iRow >= 0) { - state->row = tsdbRowFromBlockData(state->pBlockData, state->iRow); - *ppRow = &state->row; + state->nRow = state->blockData.nRow; + state->iRow = state->nRow - 1; - if (--state->iRow < 0) { - state->state = SFSNEXTROW_BLOCKDATA; - if (--state->iBlock < 0) { - tsdbDataFReaderClose(state->pDataFReader); - *state->pDataFReader = NULL; - // resetLastBlockLoadInfo(state->pLoadInfo); + state->state = SFSNEXTROW_BLOCKROW; + } - if (state->aBlockIdx) { - // taosArrayDestroy(state->aBlockIdx); - tsdbBICacheRelease(state->pTsdb->biCache, state->aBlockIdxHandle); + if (SFSNEXTROW_BLOCKROW == state->state) { + if (state->iRow < 0) { + --state->iBrinRecord; + goto _next_brinrecord; + } - state->aBlockIdxHandle = NULL; - state->aBlockIdx = NULL; - } + state->row = tsdbRowFromBlockData(state->pBlockData, state->iRow); + if (!state->pLastIter) { + *ppRow = &state->row; + --state->iRow; + return code; + } - state->state = SFSNEXTROW_FILESET; - } - } + if (!state->pLastRow) { + // get next row from fslast and process with fs row, --state->Row if select fs row + code = lastIterNext(&state->lastIter, &state->pLastRow); + if (code != TSDB_CODE_SUCCESS) { + goto _err; + } + } + + if (!state->pLastRow) { + if (state->pLastIter) { + lastIterClose(&state->pLastIter); } + *ppRow = &state->row; + --state->iRow; return code; } - default: - ASSERT(0); - break; - } -_err: - /* - if (*state->pDataFReader) { - tsdbDataFReaderClose(state->pDataFReader); - *state->pDataFReader = NULL; - resetLastBlockLoadInfo(state->pLoadInfo); - }*/ - if (state->aBlockIdx) { - // taosArrayDestroy(state->aBlockIdx); - tsdbBICacheRelease(state->pTsdb->biCache, state->aBlockIdxHandle); - - state->aBlockIdxHandle = NULL; - state->aBlockIdx = NULL; - } - if (state->pBlockData) { - tBlockDataDestroy(state->pBlockData); - state->pBlockData = NULL; - } + // process state->pLastRow & state->row + TSKEY rowTs = TSDBROW_TS(&state->row); + TSKEY lastRowTs = TSDBROW_TS(state->pLastRow); + if (lastRowTs > rowTs) { + *ppRow = state->pLastRow; + state->pLastRow = NULL; + return code; + } else if (lastRowTs < rowTs) { + *ppRow = &state->row; + --state->iRow; + return code; + } else { + // TODO: merge rows and *ppRow = mergedRow + SRowMerger *pMerger = &state->rowMerger; + tsdbRowMergerInit(pMerger, state->pTSchema); - *ppRow = NULL; + code = tsdbRowMergerAdd(pMerger, &state->row, state->pTSchema); + if (code != TSDB_CODE_SUCCESS) { + goto _err; + } + code = tsdbRowMergerAdd(pMerger, state->pLastRow, state->pTSchema); + if (code != TSDB_CODE_SUCCESS) { + goto _err; + } - return code; -} + if (state->pTSRow) { + taosMemoryFree(state->pTSRow); + state->pTSRow = NULL; + } -int32_t clearNextRowFromFS(void *iter) { - int32_t code = 0; + code = tsdbRowMergerGetRow(pMerger, &state->pTSRow); + if (code != TSDB_CODE_SUCCESS) { + goto _err; + } - SFSNextRowIter *state = (SFSNextRowIter *)iter; - if (!state) { - return code; - } - /* - if (state->pDataFReader) { - tsdbDataFReaderClose(&state->pDataFReader); - state->pDataFReader = NULL; - }*/ - if (state->aBlockIdx) { - // taosArrayDestroy(state->aBlockIdx); - tsdbBICacheRelease(state->pTsdb->biCache, state->aBlockIdxHandle); + state->row = tsdbRowFromTSRow(TSDBROW_VERSION(&state->row), state->pTSRow); + *ppRow = &state->row; + --state->iRow; - state->aBlockIdxHandle = NULL; - state->aBlockIdx = NULL; - } - if (state->pBlockData) { - // tBlockDataDestroy(&state->blockData, 1); - tBlockDataDestroy(state->pBlockData); - state->pBlockData = NULL; - } + tsdbRowMergerClear(pMerger); - if (state->blockMap.pData != NULL) { - tMapDataClear(&state->blockMap); + return code; + } } +_err: + clearLastFileSet(state); + + *ppRow = NULL; + return code; } @@ -2229,8 +2185,6 @@ typedef struct SMemNextRowIter { STbData *pMem; // [input] STbDataIter iter; // mem buffer skip list iterator int64_t lastTs; - // bool iterOpened; - // TSDBROW *curRow; } SMemNextRowIter; static int32_t getNextRowFromMem(void *iter, TSDBROW **ppRow, bool *pIgnoreEarlierTs, bool isLast, int16_t *aCols, @@ -2281,45 +2235,6 @@ _err: return code; } -/* static int32_t tsRowFromTsdbRow(STSchema *pTSchema, TSDBROW *pRow, STSRow **ppRow) { */ -/* int32_t code = 0; */ - -/* SColVal *pColVal = &(SColVal){0}; */ - -/* if (pRow->type == 0) { */ -/* *ppRow = tdRowDup(pRow->pTSRow); */ -/* } else { */ -/* SArray *pArray = taosArrayInit(pTSchema->numOfCols, sizeof(SColVal)); */ -/* if (pArray == NULL) { */ -/* code = TSDB_CODE_OUT_OF_MEMORY; */ -/* goto _exit; */ -/* } */ - -/* TSDBKEY key = TSDBROW_KEY(pRow); */ -/* STColumn *pTColumn = &pTSchema->columns[0]; */ -/* *pColVal = COL_VAL_VALUE(pTColumn->colId, pTColumn->type, (SValue){.ts = key.ts}); */ - -/* if (taosArrayPush(pArray, pColVal) == NULL) { */ -/* code = TSDB_CODE_OUT_OF_MEMORY; */ -/* goto _exit; */ -/* } */ - -/* for (int16_t iCol = 1; iCol < pTSchema->numOfCols; iCol++) { */ -/* tsdbRowGetColVal(pRow, pTSchema, iCol, pColVal); */ -/* if (taosArrayPush(pArray, pColVal) == NULL) { */ -/* code = TSDB_CODE_OUT_OF_MEMORY; */ -/* goto _exit; */ -/* } */ -/* } */ - -/* code = tdSTSRowNew(pArray, pTSchema, ppRow); */ -/* if (code) goto _exit; */ -/* } */ - -/* _exit: */ -/* return code; */ -/* } */ - static bool tsdbKeyDeleted(TSDBKEY *key, SArray *pSkyline, int64_t *iSkyline) { bool deleted = false; while (*iSkyline > 0) { @@ -2365,103 +2280,121 @@ typedef struct { _next_row_clear_fn_t nextRowClearFn; } TsdbNextRowState; -typedef struct { - SArray *pSkyline; - int64_t iSkyline; - - SBlockIdx idx; - SMemNextRowIter memState; - SMemNextRowIter imemState; - SFSLastNextRowIter fsLastState; - SFSNextRowIter fsState; - TSDBROW memRow, imemRow, fsLastRow, fsRow; - - TsdbNextRowState input[4]; - STsdb *pTsdb; +typedef struct CacheNextRowIter { + SArray *pMemDelData; + SArray *pSkyline; + int64_t iSkyline; + SBlockIdx idx; + SMemNextRowIter memState; + SMemNextRowIter imemState; + SFSNextRowIter fsState; + TSDBROW memRow, imemRow, fsLastRow, fsRow; + TsdbNextRowState input[3]; + SCacheRowsReader *pr; + STsdb *pTsdb; } CacheNextRowIter; -static int32_t nextRowIterOpen(CacheNextRowIter *pIter, tb_uid_t uid, STsdb *pTsdb, STSchema *pTSchema, tb_uid_t suid, - SSttBlockLoadInfo *pLoadInfo, SLDataIter *pLDataIter, STsdbReadSnap *pReadSnap, - SDataFReader **pDataFReader, SDataFReader **pDataFReaderLast, int64_t lastTs) { - int code = 0; +int32_t clearNextRowFromFS(void *iter) { + int32_t code = 0; - STbData *pMem = NULL; - if (pReadSnap->pMem) { - pMem = tsdbGetTbDataFromMemTable(pReadSnap->pMem, suid, uid); + SFSNextRowIter *state = (SFSNextRowIter *)iter; + if (!state) { + return code; } - STbData *pIMem = NULL; - if (pReadSnap->pIMem) { - pIMem = tsdbGetTbDataFromMemTable(pReadSnap->pIMem, suid, uid); + if (state->pLastIter) { + lastIterClose(&state->pLastIter); } - pIter->pTsdb = pTsdb; + if (state->pBlockData) { + tBlockDataDestroy(state->pBlockData); + state->pBlockData = NULL; + } - pIter->pSkyline = taosArrayInit(32, sizeof(TSDBKEY)); + if (state->pTSRow) { + taosMemoryFree(state->pTSRow); + state->pTSRow = NULL; + } - SDelFile *pDelFile = pReadSnap->fs.pDelFile; - if (pDelFile) { - SDelFReader *pDelFReader; + if (state->pRowIter->pSkyline) { + taosArrayDestroy(state->pRowIter->pSkyline); + state->pRowIter->pSkyline = NULL; + } - code = tsdbDelFReaderOpen(&pDelFReader, pDelFile, pTsdb); - if (code) goto _err; + return code; +} - SArray *pDelIdxArray = taosArrayInit(32, sizeof(SDelIdx)); +static void clearLastFileSet(SFSNextRowIter *state) { + if (state->pLastIter) { + lastIterClose(&state->pLastIter); + } - code = tsdbReadDelIdx(pDelFReader, pDelIdxArray); - if (code) { - taosArrayDestroy(pDelIdxArray); - tsdbDelFReaderClose(&pDelFReader); - goto _err; - } + if (state->pBlockData) { + tBlockDataDestroy(state->pBlockData); + state->pBlockData = NULL; + } - SDelIdx *delIdx = taosArraySearch(pDelIdxArray, &(SDelIdx){.suid = suid, .uid = uid}, tCmprDelIdx, TD_EQ); + if (state->pr->pFileReader) { + tsdbDataFileReaderClose(&state->pr->pFileReader); + state->pr->pFileReader = NULL; - code = getTableDelSkyline(pMem, pIMem, pDelFReader, delIdx, pIter->pSkyline); - if (code) { - taosArrayDestroy(pDelIdxArray); - tsdbDelFReaderClose(&pDelFReader); - goto _err; + state->pr->pCurFileSet = NULL; + } + + if (state->pTSRow) { + taosMemoryFree(state->pTSRow); + state->pTSRow = NULL; + } + + if (state->pRowIter->pSkyline) { + taosArrayDestroy(state->pRowIter->pSkyline); + state->pRowIter->pSkyline = NULL; + + void *pe = NULL; + int32_t iter = 0; + while ((pe = tSimpleHashIterate(state->pr->pTableMap, pe, &iter)) != NULL) { + STableLoadInfo *pInfo = *(STableLoadInfo **)pe; + pInfo->pTombData = taosArrayDestroy(pInfo->pTombData); } + } +} - taosArrayDestroy(pDelIdxArray); - tsdbDelFReaderClose(&pDelFReader); - } else { - code = getTableDelSkyline(pMem, pIMem, NULL, NULL, pIter->pSkyline); - if (code) goto _err; +static int32_t nextRowIterOpen(CacheNextRowIter *pIter, tb_uid_t uid, STsdb *pTsdb, STSchema *pTSchema, tb_uid_t suid, + SArray *pLDataIterArray, STsdbReadSnap *pReadSnap, int64_t lastTs, + SCacheRowsReader *pr) { + int code = 0; + + STbData *pMem = NULL; + if (pReadSnap->pMem) { + pMem = tsdbGetTbDataFromMemTable(pReadSnap->pMem, suid, uid); + } + + STbData *pIMem = NULL; + if (pReadSnap->pIMem) { + pIMem = tsdbGetTbDataFromMemTable(pReadSnap->pIMem, suid, uid); } - pIter->iSkyline = taosArrayGetSize(pIter->pSkyline) - 1; + pIter->pTsdb = pTsdb; - pIter->idx = (SBlockIdx){.suid = suid, .uid = uid}; + pIter->pMemDelData = NULL; - pIter->fsLastState.state = (SFSLASTNEXTROWSTATES)SFSNEXTROW_FS; - pIter->fsLastState.pTsdb = pTsdb; - pIter->fsLastState.aDFileSet = pReadSnap->fs.aDFileSet; - pIter->fsLastState.pTSchema = pTSchema; - pIter->fsLastState.suid = suid; - pIter->fsLastState.uid = uid; - pIter->fsLastState.pLoadInfo = pLoadInfo; - pIter->fsLastState.pDataFReader = pDataFReaderLast; - pIter->fsLastState.lastTs = lastTs; - pIter->fsLastState.pDataIter = pLDataIter; + loadMemTombData(&pIter->pMemDelData, pMem, pIMem, pr->info.verRange.maxVer); + pIter->idx = (SBlockIdx){.suid = suid, .uid = uid}; + + pIter->fsState.pRowIter = pIter; pIter->fsState.state = SFSNEXTROW_FS; - pIter->fsState.pTsdb = pTsdb; - pIter->fsState.aDFileSet = pReadSnap->fs.aDFileSet; + pIter->fsState.aDFileSet = pReadSnap->pfSetArray; pIter->fsState.pBlockIdxExp = &pIter->idx; pIter->fsState.pTSchema = pTSchema; pIter->fsState.suid = suid; pIter->fsState.uid = uid; - pIter->fsState.pLoadInfo = pLoadInfo; - pIter->fsState.pDataFReader = pDataFReader; pIter->fsState.lastTs = lastTs; + pIter->fsState.pr = pr; pIter->input[0] = (TsdbNextRowState){&pIter->memRow, true, false, false, &pIter->memState, getNextRowFromMem, NULL}; pIter->input[1] = (TsdbNextRowState){&pIter->imemRow, true, false, false, &pIter->imemState, getNextRowFromMem, NULL}; - pIter->input[2] = (TsdbNextRowState){ - &pIter->fsLastRow, false, true, false, &pIter->fsLastState, getNextRowFromFSLast, clearNextRowFromFSLast}; - pIter->input[3] = + pIter->input[2] = (TsdbNextRowState){&pIter->fsRow, false, true, false, &pIter->fsState, getNextRowFromFS, clearNextRowFromFS}; if (pMem) { @@ -2480,7 +2413,7 @@ static int32_t nextRowIterOpen(CacheNextRowIter *pIter, tb_uid_t uid, STsdb *pTs pIter->input[1].next = true; } - return code; + pIter->pr = pr; _err: return code; } @@ -2488,7 +2421,7 @@ _err: static int32_t nextRowIterClose(CacheNextRowIter *pIter) { int code = 0; - for (int i = 0; i < 4; ++i) { + for (int i = 0; i < 3; ++i) { if (pIter->input[i].nextRowClearFn) { pIter->input[i].nextRowClearFn(pIter->input[i].iter); } @@ -2498,6 +2431,10 @@ static int32_t nextRowIterClose(CacheNextRowIter *pIter) { taosArrayDestroy(pIter->pSkyline); } + if (pIter->pMemDelData) { + taosArrayDestroy(pIter->pMemDelData); + } + _err: return code; } @@ -2507,7 +2444,7 @@ static int32_t nextRowIterGet(CacheNextRowIter *pIter, TSDBROW **ppRow, bool *pI int16_t *aCols, int nCols) { int code = 0; for (;;) { - for (int i = 0; i < 4; ++i) { + for (int i = 0; i < 3; ++i) { if (pIter->input[i].next && !pIter->input[i].stop) { code = pIter->input[i].nextRowFn(pIter->input[i].iter, &pIter->input[i].pRow, &pIter->input[i].ignoreEarlierTs, isLast, aCols, nCols); @@ -2520,10 +2457,10 @@ static int32_t nextRowIterGet(CacheNextRowIter *pIter, TSDBROW **ppRow, bool *pI } } - if (pIter->input[0].stop && pIter->input[1].stop && pIter->input[2].stop && pIter->input[3].stop) { + if (pIter->input[0].stop && pIter->input[1].stop && pIter->input[2].stop) { *ppRow = NULL; - *pIgnoreEarlierTs = (pIter->input[0].ignoreEarlierTs || pIter->input[1].ignoreEarlierTs || - pIter->input[2].ignoreEarlierTs || pIter->input[3].ignoreEarlierTs); + *pIgnoreEarlierTs = + (pIter->input[0].ignoreEarlierTs || pIter->input[1].ignoreEarlierTs || pIter->input[2].ignoreEarlierTs); return code; } @@ -2533,7 +2470,7 @@ static int32_t nextRowIterGet(CacheNextRowIter *pIter, TSDBROW **ppRow, bool *pI int nMax = 0; TSKEY maxKey = TSKEY_MIN; - for (int i = 0; i < 4; ++i) { + for (int i = 0; i < 3; ++i) { if (!pIter->input[i].stop && pIter->input[i].pRow != NULL) { TSDBKEY key = TSDBROW_KEY(pIter->input[i].pRow); @@ -2559,6 +2496,24 @@ static int32_t nextRowIterGet(CacheNextRowIter *pIter, TSDBROW **ppRow, bool *pI for (int i = 0; i < nMax; ++i) { TSDBKEY maxKey1 = TSDBROW_KEY(max[i]); + if (!pIter->pSkyline) { + pIter->pSkyline = taosArrayInit(32, sizeof(TSDBKEY)); + + uint64_t uid = pIter->idx.uid; + STableLoadInfo *pInfo = getTableLoadInfo(pIter->pr, uid); + if (pInfo->pTombData == NULL) { + pInfo->pTombData = taosArrayInit(4, sizeof(SDelData)); + } + + taosArrayAddAll(pInfo->pTombData, pIter->pMemDelData); + + size_t delSize = TARRAY_SIZE(pInfo->pTombData); + if (delSize > 0) { + code = tsdbBuildDeleteSkyline(pInfo->pTombData, 0, (int32_t)(delSize - 1), pIter->pSkyline); + } + pIter->iSkyline = taosArrayGetSize(pIter->pSkyline) - 1; + } + bool deleted = tsdbKeyDeleted(&maxKey1, pIter->pSkyline, &pIter->iSkyline); if (!deleted) { iMerge[nMerge] = iMax[i]; @@ -2629,322 +2584,7 @@ static int32_t updateTSchema(int32_t sversion, SCacheRowsReader *pReader, uint64 } taosMemoryFreeClear(pReader->pCurrSchema); - return metaGetTbTSchemaEx(pReader->pTsdb->pVnode->pMeta, pReader->suid, uid, sversion, &pReader->pCurrSchema); -} - -static int32_t mergeLastRow(tb_uid_t uid, STsdb *pTsdb, bool *dup, SArray **ppColArray, SCacheRowsReader *pr) { - STSchema *pTSchema = pr->pSchema; // metaGetTbTSchema(pTsdb->pVnode->pMeta, uid, -1, 1); - int16_t nLastCol = pTSchema->numOfCols; - int16_t iCol = 0; - int16_t noneCol = 0; - bool setNoneCol = false; - bool hasRow = false; - bool ignoreEarlierTs = false; - SArray *pColArray = NULL; - SColVal *pColVal = &(SColVal){0}; - - int32_t code = initLastColArray(pTSchema, &pColArray); - if (TSDB_CODE_SUCCESS != code) { - return code; - } - - TSKEY lastRowTs = TSKEY_MAX; - - CacheNextRowIter iter = {0}; - nextRowIterOpen(&iter, uid, pTsdb, pTSchema, pr->suid, pr->pLoadInfo, pr->pDataIter, pr->pReadSnap, &pr->pDataFReader, - &pr->pDataFReaderLast, pr->lastTs); - - do { - TSDBROW *pRow = NULL; - nextRowIterGet(&iter, &pRow, &ignoreEarlierTs, false, NULL, 0); - - if (!pRow) { - break; - } - - hasRow = true; - - int32_t sversion = TSDBROW_SVERSION(pRow); - if (sversion != -1) { - code = updateTSchema(sversion, pr, uid); - if (TSDB_CODE_SUCCESS != code) { - goto _err; - } - pTSchema = pr->pCurrSchema; - } - int16_t nCol = pTSchema->numOfCols; - - TSKEY rowTs = TSDBROW_TS(pRow); - - if (lastRowTs == TSKEY_MAX) { - lastRowTs = rowTs; - STColumn *pTColumn = &pTSchema->columns[0]; - - *pColVal = COL_VAL_VALUE(pTColumn->colId, pTColumn->type, (SValue){.val = lastRowTs}); - taosArraySet(pColArray, 0, &(SLastCol){.ts = lastRowTs, .colVal = *pColVal}); - - for (iCol = 1; iCol < nCol; ++iCol) { - if (iCol >= nLastCol) { - break; - } - SLastCol *pCol = taosArrayGet(pColArray, iCol); - if (pCol->colVal.cid != pTSchema->columns[iCol].colId) { - continue; - } - tsdbRowGetColVal(pRow, pTSchema, iCol, pColVal); - - *pCol = (SLastCol){.ts = lastRowTs, .colVal = *pColVal}; - if (IS_VAR_DATA_TYPE(pColVal->type) && pColVal->value.nData > 0) { - pCol->colVal.value.pData = taosMemoryMalloc(pCol->colVal.value.nData); - if (pCol->colVal.value.pData == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - memcpy(pCol->colVal.value.pData, pColVal->value.pData, pColVal->value.nData); - } - - if (COL_VAL_IS_NONE(pColVal) && !setNoneCol) { - noneCol = iCol; - setNoneCol = true; - } - } - if (!setNoneCol) { - // done, goto return pColArray - break; - } else { - continue; - } - } - - if ((rowTs < lastRowTs)) { - // done, goto return pColArray - break; - } - - // merge into pColArray - setNoneCol = false; - for (iCol = noneCol; iCol < nCol; ++iCol) { - // high version's column value - SColVal *tColVal = (SColVal *)taosArrayGet(pColArray, iCol); - - tsdbRowGetColVal(pRow, pTSchema, iCol, pColVal); - if (COL_VAL_IS_NONE(tColVal) && !COL_VAL_IS_NONE(pColVal)) { - SLastCol lastCol = {.ts = rowTs, .colVal = *pColVal}; - if (IS_VAR_DATA_TYPE(pColVal->type) && pColVal->value.nData > 0) { - SLastCol *pLastCol = (SLastCol *)taosArrayGet(pColArray, iCol); - taosMemoryFree(pLastCol->colVal.value.pData); - - lastCol.colVal.value.pData = taosMemoryMalloc(lastCol.colVal.value.nData); - if (lastCol.colVal.value.pData == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - memcpy(lastCol.colVal.value.pData, pColVal->value.pData, pColVal->value.nData); - } - - taosArraySet(pColArray, iCol, &lastCol); - } else if (COL_VAL_IS_NONE(tColVal) && COL_VAL_IS_NONE(pColVal) && !setNoneCol) { - noneCol = iCol; - setNoneCol = true; - } - } - } while (setNoneCol); - - // build the result ts row here - *dup = false; - // if (taosArrayGetSize(pColArray) != nCol) { - //*ppColArray = NULL; - // taosArrayDestroy(pColArray); - //} else { - if (!hasRow) { - if (ignoreEarlierTs) { - taosArrayDestroy(pColArray); - pColArray = NULL; - } else { - taosArrayClear(pColArray); - } - } - *ppColArray = pColArray; - //} - - nextRowIterClose(&iter); - // taosMemoryFreeClear(pTSchema); - return code; - -_err: - nextRowIterClose(&iter); - taosArrayDestroy(pColArray); - // taosMemoryFreeClear(pTSchema); - return code; -} - -static int32_t mergeLast(tb_uid_t uid, STsdb *pTsdb, SArray **ppLastArray, SCacheRowsReader *pr) { - STSchema *pTSchema = pr->pSchema; // metaGetTbTSchema(pTsdb->pVnode->pMeta, uid, -1, 1); - int16_t nLastCol = pTSchema->numOfCols; - int16_t noneCol = 0; - bool setNoneCol = false; - bool hasRow = false; - bool ignoreEarlierTs = false; - SArray *pColArray = NULL; - SColVal *pColVal = &(SColVal){0}; - int16_t nCols = nLastCol; - - int32_t code = initLastColArray(pTSchema, &pColArray); - if (TSDB_CODE_SUCCESS != code) { - return code; - } - SArray *aColArray = taosArrayInit(nCols, sizeof(int16_t)); - if (NULL == aColArray) { - taosArrayDestroy(pColArray); - - return TSDB_CODE_OUT_OF_MEMORY; - } - for (int i = 1; i < pTSchema->numOfCols; ++i) { - taosArrayPush(aColArray, &pTSchema->columns[i].colId); - } - - TSKEY lastRowTs = TSKEY_MAX; - - CacheNextRowIter iter = {0}; - nextRowIterOpen(&iter, uid, pTsdb, pTSchema, pr->suid, pr->pLoadInfo, pr->pDataIter, pr->pReadSnap, &pr->pDataFReader, - &pr->pDataFReaderLast, pr->lastTs); - - do { - TSDBROW *pRow = NULL; - nextRowIterGet(&iter, &pRow, &ignoreEarlierTs, true, TARRAY_DATA(aColArray), TARRAY_SIZE(aColArray)); - - if (!pRow) { - break; - } - - hasRow = true; - - int32_t sversion = TSDBROW_SVERSION(pRow); - if (sversion != -1) { - code = updateTSchema(sversion, pr, uid); - if (TSDB_CODE_SUCCESS != code) { - goto _err; - } - pTSchema = pr->pCurrSchema; - } - int16_t nCol = pTSchema->numOfCols; - - TSKEY rowTs = TSDBROW_TS(pRow); - - if (lastRowTs == TSKEY_MAX) { - lastRowTs = rowTs; - STColumn *pTColumn = &pTSchema->columns[0]; - - *pColVal = COL_VAL_VALUE(pTColumn->colId, pTColumn->type, (SValue){.val = lastRowTs}); - taosArraySet(pColArray, 0, &(SLastCol){.ts = lastRowTs, .colVal = *pColVal}); - - for (int16_t iCol = 1; iCol < nCol; ++iCol) { - if (iCol >= nLastCol) { - break; - } - SLastCol *pCol = taosArrayGet(pColArray, iCol); - if (pCol->colVal.cid != pTSchema->columns[iCol].colId) { - continue; - } - tsdbRowGetColVal(pRow, pTSchema, iCol, pColVal); - - *pCol = (SLastCol){.ts = lastRowTs, .colVal = *pColVal}; - if (IS_VAR_DATA_TYPE(pColVal->type) && pColVal->value.nData > 0) { - pCol->colVal.value.pData = taosMemoryMalloc(pCol->colVal.value.nData); - if (pCol->colVal.value.pData == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - memcpy(pCol->colVal.value.pData, pColVal->value.pData, pColVal->value.nData); - } - - if (!COL_VAL_IS_VALUE(pColVal)) { - if (!setNoneCol) { - noneCol = iCol; - setNoneCol = true; - } - } else { - int32_t aColIndex = taosArraySearchIdx(aColArray, &pColVal->cid, compareInt16Val, TD_EQ); - taosArrayRemove(aColArray, aColIndex); - } - } - if (!setNoneCol) { - // done, goto return pColArray - break; - } else { - continue; - } - } - - // merge into pColArray - setNoneCol = false; - for (int16_t iCol = noneCol; iCol < nCol; ++iCol) { - if (iCol >= nLastCol) { - break; - } - // high version's column value - SLastCol *lastColVal = (SLastCol *)taosArrayGet(pColArray, iCol); - if (lastColVal->colVal.cid != pTSchema->columns[iCol].colId) { - continue; - } - SColVal *tColVal = &lastColVal->colVal; - - tsdbRowGetColVal(pRow, pTSchema, iCol, pColVal); - if (!COL_VAL_IS_VALUE(tColVal) && COL_VAL_IS_VALUE(pColVal)) { - SLastCol lastCol = {.ts = rowTs, .colVal = *pColVal}; - if (IS_VAR_DATA_TYPE(pColVal->type) && pColVal->value.nData > 0) { - SLastCol *pLastCol = (SLastCol *)taosArrayGet(pColArray, iCol); - taosMemoryFree(pLastCol->colVal.value.pData); - - lastCol.colVal.value.pData = taosMemoryMalloc(lastCol.colVal.value.nData); - if (lastCol.colVal.value.pData == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - memcpy(lastCol.colVal.value.pData, pColVal->value.pData, pColVal->value.nData); - } - - taosArraySet(pColArray, iCol, &lastCol); - int32_t aColIndex = taosArraySearchIdx(aColArray, &lastCol.colVal.cid, compareInt16Val, TD_EQ); - taosArrayRemove(aColArray, aColIndex); - } else if (!COL_VAL_IS_VALUE(tColVal) && !COL_VAL_IS_VALUE(pColVal) && !setNoneCol) { - noneCol = iCol; - setNoneCol = true; - } - } - } while (setNoneCol); - - // if (taosArrayGetSize(pColArray) <= 0) { - //*ppLastArray = NULL; - // taosArrayDestroy(pColArray); - //} else { - if (!hasRow) { - if (ignoreEarlierTs) { - taosArrayDestroy(pColArray); - pColArray = NULL; - } else { - taosArrayClear(pColArray); - } - } - *ppLastArray = pColArray; - //} - - nextRowIterClose(&iter); - taosArrayDestroy(aColArray); - // taosMemoryFreeClear(pTSchema); - return code; - -_err: - nextRowIterClose(&iter); - // taosMemoryFreeClear(pTSchema); - *ppLastArray = NULL; - taosArrayDestroy(pColArray); - taosArrayDestroy(aColArray); - return code; + return metaGetTbTSchemaEx(pReader->pTsdb->pVnode->pMeta, pReader->info.suid, uid, sversion, &pReader->pCurrSchema); } static int32_t mergeLastCid(tb_uid_t uid, STsdb *pTsdb, SArray **ppLastArray, SCacheRowsReader *pr, int16_t *aCols, @@ -2976,8 +2616,7 @@ static int32_t mergeLastCid(tb_uid_t uid, STsdb *pTsdb, SArray **ppLastArray, SC TSKEY lastRowTs = TSKEY_MAX; CacheNextRowIter iter = {0}; - nextRowIterOpen(&iter, uid, pTsdb, pTSchema, pr->suid, pr->pLoadInfo, pr->pDataIter, pr->pReadSnap, &pr->pDataFReader, - &pr->pDataFReaderLast, pr->lastTs); + nextRowIterOpen(&iter, uid, pTsdb, pTSchema, pr->info.suid, pr->pLDataIterArray, pr->pReadSnap, pr->lastTs, pr); do { TSDBROW *pRow = NULL; @@ -3146,8 +2785,7 @@ static int32_t mergeLastRowCid(tb_uid_t uid, STsdb *pTsdb, SArray **ppLastArray, TSKEY lastRowTs = TSKEY_MAX; CacheNextRowIter iter = {0}; - nextRowIterOpen(&iter, uid, pTsdb, pTSchema, pr->suid, pr->pLoadInfo, pr->pDataIter, pr->pReadSnap, &pr->pDataFReader, - &pr->pDataFReaderLast, pr->lastTs); + nextRowIterOpen(&iter, uid, pTsdb, pTSchema, pr->info.suid, pr->pLDataIterArray, pr->pReadSnap, pr->lastTs, pr); do { TSDBROW *pRow = NULL; @@ -3236,92 +2874,6 @@ _err: return code; } -int32_t tsdbCacheGetLastrowH(SLRUCache *pCache, tb_uid_t uid, SCacheRowsReader *pr, LRUHandle **handle) { - int32_t code = 0; - char key[32] = {0}; - int keyLen = 0; - - // getTableCacheKeyS(uid, "lr", key, &keyLen); - getTableCacheKey(uid, 0, key, &keyLen); - LRUHandle *h = taosLRUCacheLookup(pCache, key, keyLen); - if (!h) { - STsdb *pTsdb = pr->pVnode->pTsdb; - taosThreadMutexLock(&pTsdb->lruMutex); - - h = taosLRUCacheLookup(pCache, key, keyLen); - if (!h) { - SArray *pArray = NULL; - bool dup = false; // which is always false for now - code = mergeLastRow(uid, pTsdb, &dup, &pArray, pr); - // if table's empty or error or ignore ignore earlier ts, set handle NULL and return - if (code < 0 || pArray == NULL) { - if (!dup && pArray) { - taosArrayDestroy(pArray); - } - - taosThreadMutexUnlock(&pTsdb->lruMutex); - - *handle = NULL; - - return 0; - } - - size_t charge = pArray->capacity * pArray->elemSize + sizeof(*pArray); - _taos_lru_deleter_t deleter = deleteTableCacheLast; - LRUStatus status = - taosLRUCacheInsert(pCache, key, keyLen, pArray, charge, deleter, &h, TAOS_LRU_PRIORITY_LOW, NULL); - if (status != TAOS_LRU_STATUS_OK) { - code = -1; - } - } - taosThreadMutexUnlock(&pTsdb->lruMutex); - } - - *handle = h; - - return code; -} - -int32_t tsdbCacheGetLastH(SLRUCache *pCache, tb_uid_t uid, SCacheRowsReader *pr, LRUHandle **handle) { - int32_t code = 0; - char key[32] = {0}; - int keyLen = 0; - - // getTableCacheKeyS(uid, "l", key, &keyLen); - getTableCacheKey(uid, 1, key, &keyLen); - LRUHandle *h = taosLRUCacheLookup(pCache, key, keyLen); - if (!h) { - STsdb *pTsdb = pr->pVnode->pTsdb; - taosThreadMutexLock(&pTsdb->lruMutex); - - h = taosLRUCacheLookup(pCache, key, keyLen); - if (!h) { - SArray *pLastArray = NULL; - code = mergeLast(uid, pTsdb, &pLastArray, pr); - // if table's empty or error or ignore ignore earlier ts, set handle NULL and return - if (code < 0 || pLastArray == NULL) { - taosThreadMutexUnlock(&pTsdb->lruMutex); - - *handle = NULL; - return 0; - } - - size_t charge = pLastArray->capacity * pLastArray->elemSize + sizeof(*pLastArray); - _taos_lru_deleter_t deleter = deleteTableCacheLast; - LRUStatus status = - taosLRUCacheInsert(pCache, key, keyLen, pLastArray, charge, deleter, &h, TAOS_LRU_PRIORITY_LOW, NULL); - if (status != TAOS_LRU_STATUS_OK) { - code = -1; - } - } - taosThreadMutexUnlock(&pTsdb->lruMutex); - } - - *handle = h; - - return code; -} - int32_t tsdbCacheRelease(SLRUCache *pCache, LRUHandle *h) { int32_t code = 0; diff --git a/source/dnode/vnode/src/tsdb/tsdbCacheRead.c b/source/dnode/vnode/src/tsdb/tsdbCacheRead.c index 6138b1f7b40184be649a78543d8958fbfe80516f..66c8cc06e24adb0bba36ec1e1af7eaa60b3f4de0 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCacheRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbCacheRead.c @@ -17,6 +17,8 @@ #include "tarray.h" #include "tcommon.h" #include "tsdb.h" +#include "tsdbDataFileRW.h" +#include "tsdbReadUtil.h" #define HASTYPE(_type, _t) (((_type) & (_t)) == (_t)) @@ -124,7 +126,10 @@ int32_t tsdbReuseCacherowsReader(void* reader, void* pTableIdList, int32_t numOf pReader->numOfTables = numOfTables; pReader->lastTs = INT64_MIN; - resetLastBlockLoadInfo(pReader->pLoadInfo); + int64_t blocks; + double elapse; + pReader->pLDataIterArray = destroySttBlockReader(pReader->pLDataIterArray, &blocks, &elapse); + pReader->pLDataIterArray = taosArrayInit(4, POINTER_BYTES); return TSDB_CODE_SUCCESS; } @@ -140,11 +145,11 @@ int32_t tsdbCacherowsReaderOpen(void* pVnode, int32_t type, void* pTableIdList, p->type = type; p->pVnode = pVnode; p->pTsdb = p->pVnode->pTsdb; - p->verRange = (SVersionRange){.minVer = 0, .maxVer = UINT64_MAX}; + p->info.verRange = (SVersionRange){.minVer = 0, .maxVer = UINT64_MAX}; + p->info.suid = suid; p->numOfCols = numOfCols; p->pCidList = pCidList; p->pSlotIds = pSlotIds; - p->suid = suid; if (numOfTables == 0) { *pReader = p; @@ -176,20 +181,6 @@ int32_t tsdbCacherowsReaderOpen(void* pVnode, int32_t type, void* pTableIdList, } } - SVnodeCfg* pCfg = &((SVnode*)pVnode)->config; - int32_t numOfStt = pCfg->sttTrigger; - p->pLoadInfo = tCreateLastBlockLoadInfo(p->pSchema, NULL, 0, numOfStt); - if (p->pLoadInfo == NULL) { - tsdbCacherowsReaderClose(p); - return TSDB_CODE_OUT_OF_MEMORY; - } - - p->pDataIter = taosMemoryCalloc(pCfg->sttTrigger, sizeof(SLDataIter)); - if (p->pDataIter == NULL) { - tsdbCacherowsReaderClose(p); - return TSDB_CODE_OUT_OF_MEMORY; - } - p->idstr = taosStrdup(idstr); taosThreadMutexInit(&p->readerMutex, NULL); @@ -214,14 +205,36 @@ void* tsdbCacherowsReaderClose(void* pReader) { taosMemoryFree(p->pSchema); } - taosMemoryFree(p->pDataIter); taosMemoryFree(p->pCurrSchema); - destroyLastBlockLoadInfo(p->pLoadInfo); + if (p->pLDataIterArray) { + int64_t loadBlocks = 0; + double elapse = 0; + destroySttBlockReader(p->pLDataIterArray, &loadBlocks, &elapse); + } + + if (p->pFileReader) { + tsdbDataFileReaderClose(&p->pFileReader); + p->pFileReader = NULL; + } taosMemoryFree((void*)p->idstr); taosThreadMutexDestroy(&p->readerMutex); + if (p->pTableMap) { + void* pe = NULL; + int32_t iter = 0; + while ((pe = tSimpleHashIterate(p->pTableMap, pe, &iter)) != NULL) { + STableLoadInfo* pInfo = *(STableLoadInfo**)pe; + pInfo->pTombData = taosArrayDestroy(pInfo->pTombData); + } + + tSimpleHashCleanup(p->pTableMap); + } + if (p->uidList) { + taosMemoryFree(p->uidList); + } + taosMemoryFree(pReader); return NULL; } @@ -264,7 +277,6 @@ int32_t tsdbRetrieveCacheRows(void* pReader, SSDataBlock* pResBlock, const int32 int32_t code = TSDB_CODE_SUCCESS; SArray* pRow = taosArrayInit(TARRAY_SIZE(pr->pCidList), sizeof(SLastCol)); bool hasRes = false; - SArray* pLastCols = NULL; void** pRes = taosMemoryCalloc(pr->numOfCols, POINTER_BYTES); if (pRes == NULL) { @@ -273,59 +285,47 @@ int32_t tsdbRetrieveCacheRows(void* pReader, SSDataBlock* pResBlock, const int32 } for (int32_t j = 0; j < pr->numOfCols; ++j) { - pRes[j] = - taosMemoryCalloc(1, sizeof(SFirstLastRes) + pr->pSchema->columns[/*-1 == slotIds[j] ? 0 : */ slotIds[j]].bytes + - VARSTR_HEADER_SIZE); + pRes[j] = taosMemoryCalloc(1, sizeof(SFirstLastRes) + pr->pSchema->columns[slotIds[j]].bytes + VARSTR_HEADER_SIZE); SFirstLastRes* p = (SFirstLastRes*)varDataVal(pRes[j]); p->ts = INT64_MIN; } - pLastCols = taosArrayInit(pr->numOfCols, sizeof(SLastCol)); - if (pLastCols == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _end; - } - - for (int32_t i = 0; i < pr->numOfCols; ++i) { - int32_t slotId = slotIds[i]; - struct STColumn* pCol = &pr->pSchema->columns[slotId]; - SLastCol p = {.ts = INT64_MIN, .colVal.type = pCol->type, .colVal.flag = CV_FLAG_NULL}; - - if (IS_VAR_DATA_TYPE(pCol->type)) { - p.colVal.value.pData = taosMemoryCalloc(pCol->bytes, sizeof(char)); - } - taosArrayPush(pLastCols, &p); - } - taosThreadMutexLock(&pr->readerMutex); - code = tsdbTakeReadSnap((STsdbReader*)pr, tsdbCacheQueryReseek, &pr->pReadSnap); + code = tsdbTakeReadSnap2((STsdbReader*)pr, tsdbCacheQueryReseek, &pr->pReadSnap); if (code != TSDB_CODE_SUCCESS) { goto _end; } - pr->pDataFReader = NULL; - pr->pDataFReaderLast = NULL; - int8_t ltype = (pr->type & CACHESCAN_RETRIEVE_LAST) >> 3; + int8_t ltype = (pr->type & CACHESCAN_RETRIEVE_LAST) >> 3; + STableKeyInfo* pTableList = pr->pTableList; // retrieve the only one last row of all tables in the uid list. if (HASTYPE(pr->type, CACHESCAN_RETRIEVE_TYPE_SINGLE)) { + SArray* pLastCols = taosArrayInit(pr->numOfCols, sizeof(SLastCol)); + if (pLastCols == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _end; + } + + for (int32_t i = 0; i < pr->numOfCols; ++i) { + int32_t slotId = slotIds[i]; + struct STColumn* pCol = &pr->pSchema->columns[slotId]; + SLastCol p = {.ts = INT64_MIN, .colVal.type = pCol->type, .colVal.flag = CV_FLAG_NULL}; + + if (IS_VAR_DATA_TYPE(pCol->type)) { + p.colVal.value.pData = taosMemoryCalloc(pCol->bytes, sizeof(char)); + } + taosArrayPush(pLastCols, &p); + } + int64_t st = taosGetTimestampUs(); int64_t totalLastTs = INT64_MAX; - for (int32_t i = 0; i < pr->numOfTables; ++i) { - STableKeyInfo* pKeyInfo = &pr->pTableList[i]; + tb_uid_t uid = pTableList[i].uid; - tsdbCacheGetBatch(pr->pTsdb, pKeyInfo->uid, pRow, pr, ltype); - // tsdbCacheGet(pr->pTsdb, pKeyInfo->uid, pRow, pr, ltype); - if (TARRAY_SIZE(pRow) <= 0) { - taosArrayClearEx(pRow, freeItem); - // taosArrayClear(pRow); - continue; - } - SLastCol* pColVal = taosArrayGet(pRow, 0); - if (COL_VAL_IS_NONE(&pColVal->colVal)) { + tsdbCacheGetBatch(pr->pTsdb, uid, pRow, pr, ltype); + if (TARRAY_SIZE(pRow) <= 0 || COL_VAL_IS_NONE(&((SLastCol*)TARRAY_DATA(pRow))[0].colVal)) { taosArrayClearEx(pRow, freeItem); - // taosArrayClear(pRow); continue; } @@ -348,9 +348,9 @@ int32_t tsdbRetrieveCacheRows(void* pReader, SSDataBlock* pResBlock, const int32 p->ts = pColVal->ts; if (k == 0) { if (TARRAY_SIZE(pTableUidList) == 0) { - taosArrayPush(pTableUidList, &pKeyInfo->uid); + taosArrayPush(pTableUidList, &uid); } else { - taosArraySet(pTableUidList, 0, &pKeyInfo->uid); + taosArraySet(pTableUidList, 0, &uid); } } @@ -385,32 +385,25 @@ int32_t tsdbRetrieveCacheRows(void* pReader, SSDataBlock* pResBlock, const int32 } taosArrayClearEx(pRow, freeItem); - // taosArrayClear(pRow); } if (hasRes) { saveOneRow(pLastCols, pResBlock, pr, slotIds, dstSlotIds, pRes, pr->idstr); } + + taosArrayDestroyEx(pLastCols, freeItem); } else if (HASTYPE(pr->type, CACHESCAN_RETRIEVE_TYPE_ALL)) { for (int32_t i = pr->tableIndex; i < pr->numOfTables; ++i) { - tb_uid_t uid = pr->pTableList[i].uid; + tb_uid_t uid = pTableList[i].uid; tsdbCacheGetBatch(pr->pTsdb, uid, pRow, pr, ltype); - if (TARRAY_SIZE(pRow) <= 0) { - taosArrayClearEx(pRow, freeItem); - // taosArrayClear(pRow); - continue; - } - SLastCol* pColVal = (SLastCol*)taosArrayGet(pRow, 0); - if (COL_VAL_IS_NONE(&pColVal->colVal)) { + if (TARRAY_SIZE(pRow) <= 0 || COL_VAL_IS_NONE(&((SLastCol*)TARRAY_DATA(pRow))[0].colVal)) { taosArrayClearEx(pRow, freeItem); - // taosArrayClear(pRow); continue; } saveOneRow(pRow, pResBlock, pr, slotIds, dstSlotIds, pRes, pr->idstr); taosArrayClearEx(pRow, freeItem); - // taosArrayClear(pRow); taosArrayPush(pTableUidList, &uid); @@ -424,11 +417,8 @@ int32_t tsdbRetrieveCacheRows(void* pReader, SSDataBlock* pResBlock, const int32 } _end: - tsdbDataFReaderClose(&pr->pDataFReaderLast); - tsdbDataFReaderClose(&pr->pDataFReader); + tsdbUntakeReadSnap2((STsdbReader*)pr, pr->pReadSnap, true); - resetLastBlockLoadInfo(pr->pLoadInfo); - tsdbUntakeReadSnap((STsdbReader*)pr, pr->pReadSnap, true); taosThreadMutexUnlock(&pr->readerMutex); if (pRes != NULL) { @@ -438,9 +428,7 @@ _end: } taosMemoryFree(pRes); - // taosArrayDestroyEx(pRow, freeItem); taosArrayDestroy(pRow); - taosArrayDestroyEx(pLastCols, freeItem); return code; } diff --git a/source/dnode/vnode/src/tsdb/tsdbCommit2.c b/source/dnode/vnode/src/tsdb/tsdbCommit2.c new file mode 100644 index 0000000000000000000000000000000000000000..0639cd91a5ad4f611cc13fbad5923a8df762832a --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbCommit2.c @@ -0,0 +1,631 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbCommit2.h" + +// extern dependencies +typedef struct { + STsdb *tsdb; + TFileSetArray *fsetArr; + TFileOpArray fopArray[1]; + + // SSkmInfo skmTb[1]; + // SSkmInfo skmRow[1]; + + int32_t minutes; + int8_t precision; + int32_t minRow; + int32_t maxRow; + int8_t cmprAlg; + int32_t sttTrigger; + int32_t szPage; + int64_t compactVersion; + + struct { + int64_t cid; + int64_t now; + TSKEY nextKey; + TSKEY maxDelKey; + int32_t fid; + int32_t expLevel; + SDiskID did; + TSKEY minKey; + TSKEY maxKey; + STFileSet *fset; + TABLEID tbid[1]; + bool hasTSData; + } ctx[1]; + + // reader + SSttFileReader *sttReader; + + // iter + TTsdbIterArray dataIterArray[1]; + SIterMerger *dataIterMerger; + TTsdbIterArray tombIterArray[1]; + SIterMerger *tombIterMerger; + + // writer + SFSetWriter *writer; +} SCommitter2; + +static int32_t tsdbCommitOpenWriter(SCommitter2 *committer) { + int32_t code = 0; + int32_t lino = 0; + + SFSetWriterConfig config = { + .tsdb = committer->tsdb, + .toSttOnly = true, + .compactVersion = committer->compactVersion, + .minRow = committer->minRow, + .maxRow = committer->maxRow, + .szPage = committer->szPage, + .cmprAlg = committer->cmprAlg, + .fid = committer->ctx->fid, + .cid = committer->ctx->cid, + .did = committer->ctx->did, + .level = 0, + }; + + if (committer->sttTrigger == 1) { + config.toSttOnly = false; + + if (committer->ctx->fset) { + for (int32_t ftype = TSDB_FTYPE_MIN; ftype < TSDB_FTYPE_MAX; ftype++) { + if (committer->ctx->fset->farr[ftype] != NULL) { + config.files[ftype].exist = true; + config.files[ftype].file = committer->ctx->fset->farr[ftype]->f[0]; + } + } + } + } + + code = tsdbFSetWriterOpen(&config, &committer->writer); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(committer->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbCommitCloseWriter(SCommitter2 *committer) { + return tsdbFSetWriterClose(&committer->writer, 0, committer->fopArray); +} + +static int32_t tsdbCommitTSData(SCommitter2 *committer) { + int32_t code = 0; + int32_t lino = 0; + int64_t numOfRow = 0; + SMetaInfo info; + + committer->ctx->hasTSData = false; + + committer->ctx->tbid->suid = 0; + committer->ctx->tbid->uid = 0; + for (SRowInfo *row; (row = tsdbIterMergerGetData(committer->dataIterMerger)) != NULL;) { + if (row->uid != committer->ctx->tbid->uid) { + committer->ctx->tbid->suid = row->suid; + committer->ctx->tbid->uid = row->uid; + + if (metaGetInfo(committer->tsdb->pVnode->pMeta, row->uid, &info, NULL) != 0) { + code = tsdbIterMergerSkipTableData(committer->dataIterMerger, committer->ctx->tbid); + TSDB_CHECK_CODE(code, lino, _exit); + continue; + } + } + + int64_t ts = TSDBROW_TS(&row->row); + if (ts > committer->ctx->maxKey) { + committer->ctx->nextKey = TMIN(committer->ctx->nextKey, ts); + code = tsdbIterMergerSkipTableData(committer->dataIterMerger, committer->ctx->tbid); + TSDB_CHECK_CODE(code, lino, _exit); + continue; + } + + committer->ctx->hasTSData = true; + numOfRow++; + + code = tsdbFSetWriteRow(committer->writer, row); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbIterMergerNext(committer->dataIterMerger); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(committer->tsdb->pVnode), lino, code); + } else { + tsdbDebug("vgId:%d fid:%d commit %" PRId64 " rows", TD_VID(committer->tsdb->pVnode), committer->ctx->fid, numOfRow); + } + return code; +} + +static int32_t tsdbCommitTombData(SCommitter2 *committer) { + int32_t code = 0; + int32_t lino = 0; + int64_t numRecord = 0; + SMetaInfo info; + + if (committer->ctx->fset == NULL && !committer->ctx->hasTSData) { + if (committer->ctx->maxKey < committer->ctx->maxDelKey) { + committer->ctx->nextKey = committer->ctx->maxKey + 1; + } else { + committer->ctx->nextKey = TSKEY_MAX; + } + return 0; + } + + committer->ctx->tbid->suid = 0; + committer->ctx->tbid->uid = 0; + for (STombRecord *record; (record = tsdbIterMergerGetTombRecord(committer->tombIterMerger));) { + if (record->uid != committer->ctx->tbid->uid) { + committer->ctx->tbid->suid = record->suid; + committer->ctx->tbid->uid = record->uid; + + if (metaGetInfo(committer->tsdb->pVnode->pMeta, record->uid, &info, NULL) != 0) { + code = tsdbIterMergerSkipTableData(committer->dataIterMerger, committer->ctx->tbid); + TSDB_CHECK_CODE(code, lino, _exit); + continue; + } + } + + if (record->ekey < committer->ctx->minKey) { + goto _next; + } else if (record->skey > committer->ctx->maxKey) { + committer->ctx->maxKey = TMIN(record->skey, committer->ctx->maxKey); + goto _next; + } + + TSKEY maxKey = committer->ctx->maxKey; + if (record->ekey > committer->ctx->maxKey) { + maxKey = committer->ctx->maxKey + 1; + } + + if (record->ekey > committer->ctx->maxKey && committer->ctx->nextKey > maxKey) { + committer->ctx->nextKey = maxKey; + } + + record->skey = TMAX(record->skey, committer->ctx->minKey); + record->ekey = TMIN(record->ekey, maxKey); + + numRecord++; + code = tsdbFSetWriteTombRecord(committer->writer, record); + TSDB_CHECK_CODE(code, lino, _exit); + + _next: + code = tsdbIterMergerNext(committer->tombIterMerger); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(committer->tsdb->pVnode), lino, code); + } else { + tsdbDebug("vgId:%d fid:%d commit %" PRId64 " tomb records", TD_VID(committer->tsdb->pVnode), committer->ctx->fid, + numRecord); + } + return code; +} + +static int32_t tsdbCommitOpenReader(SCommitter2 *committer) { + int32_t code = 0; + int32_t lino = 0; + + ASSERT(committer->sttReader == NULL); + + if (committer->ctx->fset == NULL // + || committer->sttTrigger > 1 // + || TARRAY2_SIZE(committer->ctx->fset->lvlArr) == 0 // + ) { + return 0; + } + + ASSERT(TARRAY2_SIZE(committer->ctx->fset->lvlArr) == 1); + + SSttLvl *lvl = TARRAY2_FIRST(committer->ctx->fset->lvlArr); + + ASSERT(lvl->level == 0); + + if (TARRAY2_SIZE(lvl->fobjArr) == 0) { + return 0; + } + + ASSERT(TARRAY2_SIZE(lvl->fobjArr) == 1); + + STFileObj *fobj = TARRAY2_FIRST(lvl->fobjArr); + + SSttFileReaderConfig config = { + .tsdb = committer->tsdb, + .szPage = committer->szPage, + .file = fobj->f[0], + }; + code = tsdbSttFileReaderOpen(fobj->fname, &config, &committer->sttReader); + TSDB_CHECK_CODE(code, lino, _exit); + + STFileOp op = { + .optype = TSDB_FOP_REMOVE, + .fid = fobj->f->fid, + .of = fobj->f[0], + }; + + code = TARRAY2_APPEND(committer->fopArray, op); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(committer->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbCommitCloseReader(SCommitter2 *committer) { return tsdbSttFileReaderClose(&committer->sttReader); } + +static int32_t tsdbCommitOpenIter(SCommitter2 *committer) { + int32_t code = 0; + int32_t lino = 0; + + ASSERT(TARRAY2_SIZE(committer->dataIterArray) == 0); + ASSERT(committer->dataIterMerger == NULL); + ASSERT(TARRAY2_SIZE(committer->tombIterArray) == 0); + ASSERT(committer->tombIterMerger == NULL); + + STsdbIter *iter; + STsdbIterConfig config = {0}; + + // mem data iter + config.type = TSDB_ITER_TYPE_MEMT; + config.memt = committer->tsdb->imem; + config.from->ts = committer->ctx->minKey; + config.from->version = VERSION_MIN; + + code = tsdbIterOpen(&config, &iter); + TSDB_CHECK_CODE(code, lino, _exit); + + code = TARRAY2_APPEND(committer->dataIterArray, iter); + TSDB_CHECK_CODE(code, lino, _exit); + + // mem tomb iter + config.type = TSDB_ITER_TYPE_MEMT_TOMB; + config.memt = committer->tsdb->imem; + + code = tsdbIterOpen(&config, &iter); + TSDB_CHECK_CODE(code, lino, _exit); + + code = TARRAY2_APPEND(committer->tombIterArray, iter); + TSDB_CHECK_CODE(code, lino, _exit); + + // STT + if (committer->sttReader) { + // data iter + config.type = TSDB_ITER_TYPE_STT; + config.sttReader = committer->sttReader; + + code = tsdbIterOpen(&config, &iter); + TSDB_CHECK_CODE(code, lino, _exit); + + code = TARRAY2_APPEND(committer->dataIterArray, iter); + TSDB_CHECK_CODE(code, lino, _exit); + + // tomb iter + config.type = TSDB_ITER_TYPE_STT_TOMB; + config.sttReader = committer->sttReader; + + code = tsdbIterOpen(&config, &iter); + TSDB_CHECK_CODE(code, lino, _exit); + + code = TARRAY2_APPEND(committer->tombIterArray, iter); + TSDB_CHECK_CODE(code, lino, _exit); + } + + // open merger + code = tsdbIterMergerOpen(committer->dataIterArray, &committer->dataIterMerger, false); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbIterMergerOpen(committer->tombIterArray, &committer->tombIterMerger, true); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(committer->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbCommitCloseIter(SCommitter2 *committer) { + tsdbIterMergerClose(&committer->tombIterMerger); + tsdbIterMergerClose(&committer->dataIterMerger); + TARRAY2_CLEAR(committer->tombIterArray, tsdbIterClose); + TARRAY2_CLEAR(committer->dataIterArray, tsdbIterClose); + return 0; +} + +static int32_t tsdbCommitFileSetBegin(SCommitter2 *committer) { + int32_t code = 0; + int32_t lino = 0; + STsdb *tsdb = committer->tsdb; + + committer->ctx->fid = tsdbKeyFid(committer->ctx->nextKey, committer->minutes, committer->precision); + committer->ctx->expLevel = tsdbFidLevel(committer->ctx->fid, &tsdb->keepCfg, committer->ctx->now); + tsdbFidKeyRange(committer->ctx->fid, committer->minutes, committer->precision, &committer->ctx->minKey, + &committer->ctx->maxKey); + code = tfsAllocDisk(committer->tsdb->pVnode->pTfs, committer->ctx->expLevel, &committer->ctx->did); + TSDB_CHECK_CODE(code, lino, _exit); + tfsMkdirRecurAt(committer->tsdb->pVnode->pTfs, committer->tsdb->path, committer->ctx->did); + STFileSet fset = {.fid = committer->ctx->fid}; + committer->ctx->fset = &fset; + STFileSet **fsetPtr = TARRAY2_SEARCH(committer->fsetArr, &committer->ctx->fset, tsdbTFileSetCmprFn, TD_EQ); + committer->ctx->fset = (fsetPtr == NULL) ? NULL : *fsetPtr; + committer->ctx->tbid->suid = 0; + committer->ctx->tbid->uid = 0; + + ASSERT(TARRAY2_SIZE(committer->dataIterArray) == 0); + ASSERT(committer->dataIterMerger == NULL); + ASSERT(committer->writer == NULL); + + code = tsdbCommitOpenReader(committer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbCommitOpenIter(committer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbCommitOpenWriter(committer); + TSDB_CHECK_CODE(code, lino, _exit); + + // reset nextKey + committer->ctx->nextKey = TSKEY_MAX; + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); + } else { + tsdbDebug("vgId:%d %s done, fid:%d minKey:%" PRId64 " maxKey:%" PRId64 " expLevel:%d", TD_VID(tsdb->pVnode), + __func__, committer->ctx->fid, committer->ctx->minKey, committer->ctx->maxKey, committer->ctx->expLevel); + } + return 0; +} + +static int32_t tsdbCommitFileSetEnd(SCommitter2 *committer) { + int32_t code = 0; + int32_t lino = 0; + + code = tsdbCommitCloseWriter(committer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbCommitCloseIter(committer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbCommitCloseReader(committer); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(committer->tsdb->pVnode), lino, code); + } else { + tsdbDebug("vgId:%d %s done, fid:%d", TD_VID(committer->tsdb->pVnode), __func__, committer->ctx->fid); + } + return code; +} + +static int32_t tsdbCommitFileSet(SCommitter2 *committer) { + int32_t code = 0; + int32_t lino = 0; + + // fset commit start + code = tsdbCommitFileSetBegin(committer); + TSDB_CHECK_CODE(code, lino, _exit); + + // commit fset + code = tsdbCommitTSData(committer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbCommitTombData(committer); + TSDB_CHECK_CODE(code, lino, _exit); + + // fset commit end + code = tsdbCommitFileSetEnd(committer); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(committer->tsdb->pVnode), lino, code); + } else { + tsdbDebug("vgId:%d %s done, fid:%d", TD_VID(committer->tsdb->pVnode), __func__, committer->ctx->fid); + } + return code; +} + +static int32_t tsdbOpenCommitter(STsdb *tsdb, SCommitInfo *info, SCommitter2 *committer) { + int32_t code = 0; + int32_t lino = 0; + + memset(committer, 0, sizeof(committer[0])); + + committer->tsdb = tsdb; + code = tsdbFSCreateCopySnapshot(tsdb->pFS, &committer->fsetArr); + TSDB_CHECK_CODE(code, lino, _exit); + committer->minutes = tsdb->keepCfg.days; + committer->precision = tsdb->keepCfg.precision; + committer->minRow = info->info.config.tsdbCfg.minRows; + committer->maxRow = info->info.config.tsdbCfg.maxRows; + committer->cmprAlg = info->info.config.tsdbCfg.compression; + committer->sttTrigger = info->info.config.sttTrigger; + committer->szPage = info->info.config.tsdbPageSize; + committer->compactVersion = INT64_MAX; + committer->ctx->cid = tsdbFSAllocEid(tsdb->pFS); + committer->ctx->now = taosGetTimestampSec(); + + committer->ctx->nextKey = tsdb->imem->minKey; + if (tsdb->imem->nDel > 0) { + SRBTreeIter iter[1] = {tRBTreeIterCreate(tsdb->imem->tbDataTree, 1)}; + + for (SRBTreeNode *node = tRBTreeIterNext(iter); node; node = tRBTreeIterNext(iter)) { + STbData *tbData = TCONTAINER_OF(node, STbData, rbtn); + + for (SDelData *delData = tbData->pHead; delData; delData = delData->pNext) { + if (delData->sKey < committer->ctx->nextKey) { + committer->ctx->nextKey = delData->sKey; + } + } + } + } + + committer->ctx->maxDelKey = TSKEY_MIN; + TSKEY minKey = TSKEY_MAX; + TSKEY maxKey = TSKEY_MIN; + if (TARRAY2_SIZE(committer->fsetArr) > 0) { + STFileSet *fset = TARRAY2_LAST(committer->fsetArr); + tsdbFidKeyRange(fset->fid, committer->minutes, committer->precision, &minKey, &committer->ctx->maxDelKey); + + fset = TARRAY2_FIRST(committer->fsetArr); + tsdbFidKeyRange(fset->fid, committer->minutes, committer->precision, &minKey, &maxKey); + } + + if (committer->ctx->nextKey < TMIN(tsdb->imem->minKey, minKey)) { + committer->ctx->nextKey = TMIN(tsdb->imem->minKey, minKey); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); + } else { + tsdbDebug("vgId:%d %s done", TD_VID(tsdb->pVnode), __func__); + } + return code; +} + +static int32_t tsdbCloseCommitter(SCommitter2 *committer, int32_t eno) { + int32_t code = 0; + int32_t lino = 0; + + if (eno == 0) { + code = tsdbFSEditBegin(committer->tsdb->pFS, committer->fopArray, TSDB_FEDIT_COMMIT); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + // TODO + ASSERT(0); + } + + ASSERT(committer->writer == NULL); + ASSERT(committer->dataIterMerger == NULL); + ASSERT(committer->tombIterMerger == NULL); + TARRAY2_DESTROY(committer->dataIterArray, NULL); + TARRAY2_DESTROY(committer->tombIterArray, NULL); + TARRAY2_DESTROY(committer->fopArray, NULL); + tsdbFSDestroyCopySnapshot(&committer->fsetArr); + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s, eid:%" PRId64, TD_VID(committer->tsdb->pVnode), __func__, lino, + tstrerror(code), committer->ctx->cid); + } else { + tsdbDebug("vgId:%d %s done, eid:%" PRId64, TD_VID(committer->tsdb->pVnode), __func__, committer->ctx->cid); + } + return code; +} + +int32_t tsdbPreCommit(STsdb *tsdb) { + taosThreadRwlockWrlock(&tsdb->rwLock); + ASSERT(tsdb->imem == NULL); + tsdb->imem = tsdb->mem; + tsdb->mem = NULL; + taosThreadRwlockUnlock(&tsdb->rwLock); + return 0; +} + +int32_t tsdbCommitBegin(STsdb *tsdb, SCommitInfo *info) { + if (!tsdb) return 0; + + int32_t code = 0; + int32_t lino = 0; + + SMemTable *imem = tsdb->imem; + int64_t nRow = imem->nRow; + int64_t nDel = imem->nDel; + + if (nRow == 0 && nDel == 0) { + taosThreadRwlockWrlock(&tsdb->rwLock); + tsdb->imem = NULL; + taosThreadRwlockUnlock(&tsdb->rwLock); + tsdbUnrefMemTable(imem, NULL, true); + } else { + SCommitter2 committer[1]; + + code = tsdbOpenCommitter(tsdb, info, committer); + TSDB_CHECK_CODE(code, lino, _exit); + + while (committer->ctx->nextKey != TSKEY_MAX) { + code = tsdbCommitFileSet(committer); + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbCloseCommitter(committer, code); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); + } else { + tsdbInfo("vgId:%d %s done, nRow:%" PRId64 " nDel:%" PRId64, TD_VID(tsdb->pVnode), __func__, nRow, nDel); + } + return code; +} + +int32_t tsdbCommitCommit(STsdb *tsdb) { + int32_t code = 0; + int32_t lino = 0; + + if (tsdb->imem == NULL) goto _exit; + + SMemTable *pMemTable = tsdb->imem; + taosThreadRwlockWrlock(&tsdb->rwLock); + code = tsdbFSEditCommit(tsdb->pFS); + if (code) { + taosThreadRwlockUnlock(&tsdb->rwLock); + TSDB_CHECK_CODE(code, lino, _exit); + } + tsdb->imem = NULL; + taosThreadRwlockUnlock(&tsdb->rwLock); + tsdbUnrefMemTable(pMemTable, NULL, true); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); + } else { + tsdbInfo("vgId:%d %s done", TD_VID(tsdb->pVnode), __func__); + } + return code; +} + +int32_t tsdbCommitAbort(STsdb *pTsdb) { + int32_t code = 0; + int32_t lino = 0; + + if (pTsdb->imem == NULL) goto _exit; + + code = tsdbFSEditAbort(pTsdb->pFS); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + tsdbError("vgId:%d, %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); + } else { + tsdbInfo("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__); + } + return code; +} diff --git a/source/dnode/vnode/src/tsdb/tsdbCommit2.h b/source/dnode/vnode/src/tsdb/tsdbCommit2.h new file mode 100644 index 0000000000000000000000000000000000000000..41f72f345b4575f90c0632b857e0d0eae6f89a7a --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbCommit2.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbDataFileRW.h" +#include "tsdbFS2.h" +#include "tsdbFSetRW.h" +#include "tsdbIter.h" +#include "tsdbSttFileRW.h" + +#ifndef _TSDB_COMMIT_H_ +#define _TSDB_COMMIT_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef __cplusplus +} +#endif + +#endif /*_TSDB_COMMIT_H_*/ \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c b/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c new file mode 100644 index 0000000000000000000000000000000000000000..dc5e3649cc3b737b4b0a0dc340751cd2ce7853d3 --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c @@ -0,0 +1,1696 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbDataFileRW.h" + +extern int32_t tsdbFileWriteTombBlock(STsdbFD *fd, STombBlock *tombBlock, int8_t cmprAlg, int64_t *fileSize, + TTombBlkArray *tombBlkArray, uint8_t **bufArr); +extern int32_t tsdbFileWriteTombBlk(STsdbFD *fd, const TTombBlkArray *tombBlkArray, SFDataPtr *ptr, int64_t *fileSize); + +// SDataFileReader ============================================= +struct SDataFileReader { + SDataFileReaderConfig config[1]; + + uint8_t *bufArr[5]; + + struct { + bool headFooterLoaded; + bool tombFooterLoaded; + bool brinBlkLoaded; + bool tombBlkLoaded; + } ctx[1]; + + STsdbFD *fd[TSDB_FTYPE_MAX]; + + SHeadFooter headFooter[1]; + STombFooter tombFooter[1]; + TBrinBlkArray brinBlkArray[1]; + TTombBlkArray tombBlkArray[1]; +}; + +static int32_t tsdbDataFileReadHeadFooter(SDataFileReader *reader) { + if (reader->ctx->headFooterLoaded) return 0; + + int32_t code = 0; + int32_t lino = 0; + + int32_t ftype = TSDB_FTYPE_HEAD; + if (reader->fd[ftype]) { + code = tsdbReadFile(reader->fd[ftype], reader->config->files[ftype].file.size - sizeof(SHeadFooter), + (uint8_t *)reader->headFooter, sizeof(SHeadFooter)); + TSDB_CHECK_CODE(code, lino, _exit); + } + + reader->ctx->headFooterLoaded = true; + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(reader->config->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbDataFileReadTombFooter(SDataFileReader *reader) { + if (reader->ctx->tombFooterLoaded) return 0; + + int32_t code = 0; + int32_t lino = 0; + + int32_t ftype = TSDB_FTYPE_TOMB; + if (reader->fd[ftype]) { + code = tsdbReadFile(reader->fd[ftype], reader->config->files[ftype].file.size - sizeof(STombFooter), + (uint8_t *)reader->tombFooter, sizeof(STombFooter)); + TSDB_CHECK_CODE(code, lino, _exit); + } + reader->ctx->tombFooterLoaded = true; + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(reader->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbDataFileReaderOpen(const char *fname[], const SDataFileReaderConfig *config, SDataFileReader **reader) { + int32_t code = 0; + int32_t lino = 0; + + reader[0] = taosMemoryCalloc(1, sizeof(**reader)); + if (reader[0] == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + reader[0]->config[0] = config[0]; + if (reader[0]->config->bufArr == NULL) { + reader[0]->config->bufArr = reader[0]->bufArr; + } + + if (fname) { + for (int32_t i = 0; i < TSDB_FTYPE_MAX; ++i) { + if (fname[i]) { + code = tsdbOpenFile(fname[i], config->szPage, TD_FILE_READ, &reader[0]->fd[i]); + TSDB_CHECK_CODE(code, lino, _exit); + } + } + } else { + for (int32_t i = 0; i < TSDB_FTYPE_MAX; ++i) { + if (config->files[i].exist) { + char fname1[TSDB_FILENAME_LEN]; + tsdbTFileName(config->tsdb, &config->files[i].file, fname1); + code = tsdbOpenFile(fname1, config->szPage, TD_FILE_READ, &reader[0]->fd[i]); + TSDB_CHECK_CODE(code, lino, _exit); + } + } + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbDataFileReaderClose(SDataFileReader **reader) { + if (reader[0] == NULL) return 0; + + TARRAY2_DESTROY(reader[0]->tombBlkArray, NULL); + TARRAY2_DESTROY(reader[0]->brinBlkArray, NULL); + +#if 0 + TARRAY2_DESTROY(reader[0]->dataBlkArray, NULL); + TARRAY2_DESTROY(reader[0]->blockIdxArray, NULL); +#endif + + for (int32_t i = 0; i < TSDB_FTYPE_MAX; ++i) { + if (reader[0]->fd[i]) { + tsdbCloseFile(&reader[0]->fd[i]); + } + } + + for (int32_t i = 0; i < ARRAY_SIZE(reader[0]->bufArr); ++i) { + tFree(reader[0]->bufArr[i]); + } + + taosMemoryFree(reader[0]); + reader[0] = NULL; + return 0; +} + +int32_t tsdbDataFileReadBrinBlk(SDataFileReader *reader, const TBrinBlkArray **brinBlkArray) { + int32_t code = 0; + int32_t lino = 0; + + if (!reader->ctx->brinBlkLoaded) { + code = tsdbDataFileReadHeadFooter(reader); + TSDB_CHECK_CODE(code, lino, _exit); + + if (reader->headFooter->brinBlkPtr->size > 0) { + void *data = taosMemoryMalloc(reader->headFooter->brinBlkPtr->size); + if (data == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbReadFile(reader->fd[TSDB_FTYPE_HEAD], reader->headFooter->brinBlkPtr->offset, data, + reader->headFooter->brinBlkPtr->size); + if (code) { + taosMemoryFree(data); + TSDB_CHECK_CODE(code, lino, _exit); + } + + int32_t size = reader->headFooter->brinBlkPtr->size / sizeof(SBrinBlk); + TARRAY2_INIT_EX(reader->brinBlkArray, size, size, data); + } else { + TARRAY2_INIT(reader->brinBlkArray); + } + + reader->ctx->brinBlkLoaded = true; + } + brinBlkArray[0] = reader->brinBlkArray; + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(reader->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbDataFileReadBrinBlock(SDataFileReader *reader, const SBrinBlk *brinBlk, SBrinBlock *brinBlock) { + int32_t code = 0; + int32_t lino = 0; + + code = tRealloc(&reader->config->bufArr[0], brinBlk->dp->size); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbReadFile(reader->fd[TSDB_FTYPE_HEAD], brinBlk->dp->offset, reader->config->bufArr[0], brinBlk->dp->size); + TSDB_CHECK_CODE(code, lino, _exit); + + int32_t size = 0; + tBrinBlockClear(brinBlock); + for (int32_t i = 0; i < ARRAY_SIZE(brinBlock->dataArr1); i++) { + code = tsdbDecmprData(reader->config->bufArr[0] + size, brinBlk->size[i], TSDB_DATA_TYPE_BIGINT, brinBlk->cmprAlg, + &reader->config->bufArr[1], brinBlk->numRec * sizeof(int64_t), &reader->config->bufArr[2]); + TSDB_CHECK_CODE(code, lino, _exit); + + code = TARRAY2_APPEND_BATCH(&brinBlock->dataArr1[i], reader->config->bufArr[1], brinBlk->numRec); + TSDB_CHECK_CODE(code, lino, _exit); + + size += brinBlk->size[i]; + } + + for (int32_t i = 0, j = ARRAY_SIZE(brinBlock->dataArr1); i < ARRAY_SIZE(brinBlock->dataArr2); i++, j++) { + code = tsdbDecmprData(reader->config->bufArr[0] + size, brinBlk->size[j], TSDB_DATA_TYPE_INT, brinBlk->cmprAlg, + &reader->config->bufArr[1], brinBlk->numRec * sizeof(int32_t), &reader->config->bufArr[2]); + TSDB_CHECK_CODE(code, lino, _exit); + + code = TARRAY2_APPEND_BATCH(&brinBlock->dataArr2[i], reader->config->bufArr[1], brinBlk->numRec); + TSDB_CHECK_CODE(code, lino, _exit); + + size += brinBlk->size[j]; + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(reader->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbDataFileReadBlockData(SDataFileReader *reader, const SBrinRecord *record, SBlockData *bData) { + int32_t code = 0; + int32_t lino = 0; + + code = tRealloc(&reader->config->bufArr[0], record->blockSize); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbReadFile(reader->fd[TSDB_FTYPE_DATA], record->blockOffset, reader->config->bufArr[0], record->blockSize); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tDecmprBlockData(reader->config->bufArr[0], record->blockSize, bData, &reader->config->bufArr[1]); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(reader->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbDataFileReadBlockDataByColumn(SDataFileReader *reader, const SBrinRecord *record, SBlockData *bData, + STSchema *pTSchema, int16_t cids[], int32_t ncid) { + int32_t code = 0; + int32_t lino = 0; + + code = tBlockDataInit(bData, (TABLEID *)record, pTSchema, cids, ncid); + TSDB_CHECK_CODE(code, lino, _exit); + + // uid + version + tskey + code = tRealloc(&reader->config->bufArr[0], record->blockKeySize); + TSDB_CHECK_CODE(code, lino, _exit); + + code = + tsdbReadFile(reader->fd[TSDB_FTYPE_DATA], record->blockOffset, reader->config->bufArr[0], record->blockKeySize); + TSDB_CHECK_CODE(code, lino, _exit); + + // hdr + SDiskDataHdr hdr[1]; + int32_t size = 0; + + size += tGetDiskDataHdr(reader->config->bufArr[0] + size, hdr); + + ASSERT(hdr->delimiter == TSDB_FILE_DLMT); + ASSERT(record->uid == hdr->uid); + + bData->nRow = hdr->nRow; + + // uid + ASSERT(hdr->uid); + + // version + code = tsdbDecmprData(reader->config->bufArr[0] + size, hdr->szVer, TSDB_DATA_TYPE_BIGINT, hdr->cmprAlg, + (uint8_t **)&bData->aVersion, sizeof(int64_t) * hdr->nRow, &reader->config->bufArr[1]); + TSDB_CHECK_CODE(code, lino, _exit); + size += hdr->szVer; + + // ts + code = tsdbDecmprData(reader->config->bufArr[0] + size, hdr->szKey, TSDB_DATA_TYPE_TIMESTAMP, hdr->cmprAlg, + (uint8_t **)&bData->aTSKEY, sizeof(TSKEY) * hdr->nRow, &reader->config->bufArr[1]); + TSDB_CHECK_CODE(code, lino, _exit); + size += hdr->szKey; + + ASSERT(size == record->blockKeySize); + + // other columns + if (bData->nColData > 0) { + if (hdr->szBlkCol > 0) { + code = tRealloc(&reader->config->bufArr[0], hdr->szBlkCol); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbReadFile(reader->fd[TSDB_FTYPE_DATA], record->blockOffset + record->blockKeySize, + reader->config->bufArr[0], hdr->szBlkCol); + TSDB_CHECK_CODE(code, lino, _exit); + } + + SBlockCol bc[1] = {{.cid = 0}}; + SBlockCol *blockCol = bc; + + size = 0; + for (int32_t i = 0; i < bData->nColData; i++) { + SColData *colData = tBlockDataGetColDataByIdx(bData, i); + + while (blockCol && blockCol->cid < colData->cid) { + if (size < hdr->szBlkCol) { + size += tGetBlockCol(reader->config->bufArr[0] + size, blockCol); + } else { + ASSERT(size == hdr->szBlkCol); + blockCol = NULL; + } + } + + if (blockCol == NULL || blockCol->cid > colData->cid) { + for (int32_t iRow = 0; iRow < hdr->nRow; iRow++) { + code = tColDataAppendValue(colData, &COL_VAL_NONE(colData->cid, colData->type)); + TSDB_CHECK_CODE(code, lino, _exit); + } + } else { + ASSERT(blockCol->type == colData->type); + ASSERT(blockCol->flag && blockCol->flag != HAS_NONE); + + if (blockCol->flag == HAS_NULL) { + for (int32_t iRow = 0; iRow < hdr->nRow; iRow++) { + code = tColDataAppendValue(colData, &COL_VAL_NULL(blockCol->cid, blockCol->type)); + TSDB_CHECK_CODE(code, lino, _exit); + } + } else { + int32_t size1 = blockCol->szBitmap + blockCol->szOffset + blockCol->szValue; + + code = tRealloc(&reader->config->bufArr[1], size1); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbReadFile(reader->fd[TSDB_FTYPE_DATA], + record->blockOffset + record->blockKeySize + hdr->szBlkCol + blockCol->offset, + reader->config->bufArr[1], size1); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbDecmprColData(reader->config->bufArr[1], blockCol, hdr->cmprAlg, hdr->nRow, colData, + &reader->config->bufArr[2]); + TSDB_CHECK_CODE(code, lino, _exit); + } + } + } + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(reader->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbDataFileReadBlockSma(SDataFileReader *reader, const SBrinRecord *record, + TColumnDataAggArray *columnDataAggArray) { + int32_t code = 0; + int32_t lino = 0; + + TARRAY2_CLEAR(columnDataAggArray, NULL); + if (record->smaSize > 0) { + code = tRealloc(&reader->config->bufArr[0], record->smaSize); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbReadFile(reader->fd[TSDB_FTYPE_SMA], record->smaOffset, reader->config->bufArr[0], record->smaSize); + TSDB_CHECK_CODE(code, lino, _exit); + + // decode sma data + int32_t size = 0; + while (size < record->smaSize) { + SColumnDataAgg sma[1]; + + size += tGetColumnDataAgg(reader->config->bufArr[0] + size, sma); + + code = TARRAY2_APPEND_PTR(columnDataAggArray, sma); + TSDB_CHECK_CODE(code, lino, _exit); + } + ASSERT(size == record->smaSize); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(reader->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbDataFileReadTombBlk(SDataFileReader *reader, const TTombBlkArray **tombBlkArray) { + int32_t code = 0; + int32_t lino = 0; + + if (!reader->ctx->tombBlkLoaded) { + code = tsdbDataFileReadTombFooter(reader); + TSDB_CHECK_CODE(code, lino, _exit); + + if (reader->tombFooter->tombBlkPtr->size > 0) { + void *data = taosMemoryMalloc(reader->tombFooter->tombBlkPtr->size); + if (data == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbReadFile(reader->fd[TSDB_FTYPE_TOMB], reader->tombFooter->tombBlkPtr->offset, data, + reader->tombFooter->tombBlkPtr->size); + if (code) { + taosMemoryFree(data); + TSDB_CHECK_CODE(code, lino, _exit); + } + + int32_t size = reader->tombFooter->tombBlkPtr->size / sizeof(STombBlk); + TARRAY2_INIT_EX(reader->tombBlkArray, size, size, data); + } else { + TARRAY2_INIT(reader->tombBlkArray); + } + + reader->ctx->tombBlkLoaded = true; + } + tombBlkArray[0] = reader->tombBlkArray; + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(reader->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbDataFileReadTombBlock(SDataFileReader *reader, const STombBlk *tombBlk, STombBlock *tData) { + int32_t code = 0; + int32_t lino = 0; + + code = tRealloc(&reader->config->bufArr[0], tombBlk->dp->size); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbReadFile(reader->fd[TSDB_FTYPE_TOMB], tombBlk->dp->offset, reader->config->bufArr[0], tombBlk->dp->size); + TSDB_CHECK_CODE(code, lino, _exit); + + int32_t size = 0; + tTombBlockClear(tData); + for (int32_t i = 0; i < ARRAY_SIZE(tData->dataArr); ++i) { + code = tsdbDecmprData(reader->config->bufArr[0] + size, tombBlk->size[i], TSDB_DATA_TYPE_BIGINT, tombBlk->cmprAlg, + &reader->config->bufArr[1], sizeof(int64_t) * tombBlk->numRec, &reader->config->bufArr[2]); + TSDB_CHECK_CODE(code, lino, _exit); + + code = TARRAY2_APPEND_BATCH(&tData->dataArr[i], reader->config->bufArr[1], tombBlk->numRec); + TSDB_CHECK_CODE(code, lino, _exit); + + size += tombBlk->size[i]; + } + ASSERT(size == tombBlk->dp->size); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(reader->config->tsdb->pVnode), lino, code); + } + return code; +} + +// SDataFileWriter ============================================= +struct SDataFileWriter { + SDataFileWriterConfig config[1]; + + SSkmInfo skmTb[1]; + SSkmInfo skmRow[1]; + uint8_t *bufArr[5]; + + struct { + bool opened; + SDataFileReader *reader; + + // for ts data + TABLEID tbid[1]; + bool tbHasOldData; + + const TBrinBlkArray *brinBlkArray; + int32_t brinBlkArrayIdx; + SBrinBlock brinBlock[1]; + int32_t brinBlockIdx; + SBlockData blockData[1]; + int32_t blockDataIdx; + // for tomb data + bool hasOldTomb; + const TTombBlkArray *tombBlkArray; + int32_t tombBlkArrayIdx; + STombBlock tombBlock[1]; + int32_t tombBlockIdx; + } ctx[1]; + + STFile files[TSDB_FTYPE_MAX]; + STsdbFD *fd[TSDB_FTYPE_MAX]; + + SHeadFooter headFooter[1]; + STombFooter tombFooter[1]; + + TBrinBlkArray brinBlkArray[1]; + SBrinBlock brinBlock[1]; + SBlockData blockData[1]; + + TTombBlkArray tombBlkArray[1]; + STombBlock tombBlock[1]; +}; + +static int32_t tsdbDataFileWriterCloseAbort(SDataFileWriter *writer) { + ASSERT(0); + return 0; +} + +static int32_t tsdbDataFileWriterDoClose(SDataFileWriter *writer) { + if (writer->ctx->reader) { + tsdbDataFileReaderClose(&writer->ctx->reader); + } + + tTombBlockDestroy(writer->tombBlock); + TARRAY2_DESTROY(writer->tombBlkArray, NULL); + tBlockDataDestroy(writer->blockData); + tBrinBlockDestroy(writer->brinBlock); + TARRAY2_DESTROY(writer->brinBlkArray, NULL); + + tTombBlockDestroy(writer->ctx->tombBlock); + tBlockDataDestroy(writer->ctx->blockData); + tBrinBlockDestroy(writer->ctx->brinBlock); + + for (int32_t i = 0; i < ARRAY_SIZE(writer->bufArr); ++i) { + tFree(writer->bufArr[i]); + } + + tDestroyTSchema(writer->skmRow->pTSchema); + tDestroyTSchema(writer->skmTb->pTSchema); + return 0; +} + +static int32_t tsdbDataFileWriterDoOpenReader(SDataFileWriter *writer) { + int32_t code = 0; + int32_t lino = 0; + + for (int32_t i = 0; i < TSDB_FTYPE_MAX; ++i) { + if (writer->config->files[i].exist) { + SDataFileReaderConfig config[1] = {{ + .tsdb = writer->config->tsdb, + .szPage = writer->config->szPage, + .bufArr = writer->config->bufArr, + }}; + + for (int32_t i = 0; i < TSDB_FTYPE_MAX; ++i) { + config->files[i].exist = writer->config->files[i].exist; + if (config->files[i].exist) { + config->files[i].file = writer->config->files[i].file; + } + } + + code = tsdbDataFileReaderOpen(NULL, config, &writer->ctx->reader); + TSDB_CHECK_CODE(code, lino, _exit); + break; + } + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbDataFileWriterDoOpen(SDataFileWriter *writer) { + int32_t code = 0; + int32_t lino = 0; + int32_t ftype; + + if (!writer->config->skmTb) writer->config->skmTb = writer->skmTb; + if (!writer->config->skmRow) writer->config->skmRow = writer->skmRow; + if (!writer->config->bufArr) writer->config->bufArr = writer->bufArr; + + // open reader + code = tsdbDataFileWriterDoOpenReader(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + // .head + ftype = TSDB_FTYPE_HEAD; + writer->files[ftype] = (STFile){ + .type = ftype, + .did = writer->config->did, + .fid = writer->config->fid, + .cid = writer->config->cid, + .size = 0, + }; + + // .data + ftype = TSDB_FTYPE_DATA; + if (writer->config->files[ftype].exist) { + writer->files[ftype] = writer->config->files[ftype].file; + } else { + writer->files[ftype] = (STFile){ + .type = ftype, + .did = writer->config->did, + .fid = writer->config->fid, + .cid = writer->config->cid, + .size = 0, + }; + } + + // .sma + ftype = TSDB_FTYPE_SMA; + if (writer->config->files[ftype].exist) { + writer->files[ftype] = writer->config->files[ftype].file; + } else { + writer->files[ftype] = (STFile){ + .type = ftype, + .did = writer->config->did, + .fid = writer->config->fid, + .cid = writer->config->cid, + .size = 0, + }; + } + + // .tomb + ftype = TSDB_FTYPE_TOMB; + writer->files[ftype] = (STFile){ + .type = ftype, + .did = writer->config->did, + .fid = writer->config->fid, + .cid = writer->config->cid, + .size = 0, + }; + + writer->ctx->opened = true; + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbFileWriteBrinBlock(STsdbFD *fd, SBrinBlock *brinBlock, int8_t cmprAlg, int64_t *fileSize, + TBrinBlkArray *brinBlkArray, uint8_t **bufArr) { + if (BRIN_BLOCK_SIZE(brinBlock) == 0) return 0; + + int32_t code; + + // get SBrinBlk + SBrinBlk brinBlk[1] = { + { + .dp[0] = + { + .offset = *fileSize, + .size = 0, + }, + .minTbid = + { + .suid = TARRAY2_FIRST(brinBlock->suid), + .uid = TARRAY2_FIRST(brinBlock->uid), + }, + .maxTbid = + { + .suid = TARRAY2_LAST(brinBlock->suid), + .uid = TARRAY2_LAST(brinBlock->uid), + }, + .minVer = TARRAY2_FIRST(brinBlock->minVer), + .maxVer = TARRAY2_FIRST(brinBlock->minVer), + .numRec = BRIN_BLOCK_SIZE(brinBlock), + .cmprAlg = cmprAlg, + }, + }; + + for (int32_t i = 1; i < BRIN_BLOCK_SIZE(brinBlock); i++) { + if (brinBlk->minVer > TARRAY2_GET(brinBlock->minVer, i)) { + brinBlk->minVer = TARRAY2_GET(brinBlock->minVer, i); + } + if (brinBlk->maxVer < TARRAY2_GET(brinBlock->maxVer, i)) { + brinBlk->maxVer = TARRAY2_GET(brinBlock->maxVer, i); + } + } + + // write to file + for (int32_t i = 0; i < ARRAY_SIZE(brinBlock->dataArr1); i++) { + code = tsdbCmprData((uint8_t *)TARRAY2_DATA(brinBlock->dataArr1 + i), TARRAY2_DATA_LEN(brinBlock->dataArr1 + i), + TSDB_DATA_TYPE_BIGINT, brinBlk->cmprAlg, &bufArr[0], 0, &brinBlk->size[i], &bufArr[1]); + if (code) return code; + + code = tsdbWriteFile(fd, *fileSize, bufArr[0], brinBlk->size[i]); + if (code) return code; + + brinBlk->dp->size += brinBlk->size[i]; + *fileSize += brinBlk->size[i]; + } + + for (int32_t i = 0, j = ARRAY_SIZE(brinBlock->dataArr1); i < ARRAY_SIZE(brinBlock->dataArr2); i++, j++) { + code = tsdbCmprData((uint8_t *)TARRAY2_DATA(brinBlock->dataArr2 + i), TARRAY2_DATA_LEN(brinBlock->dataArr2 + i), + TSDB_DATA_TYPE_INT, brinBlk->cmprAlg, &bufArr[0], 0, &brinBlk->size[j], &bufArr[1]); + if (code) return code; + + code = tsdbWriteFile(fd, *fileSize, bufArr[0], brinBlk->size[j]); + if (code) return code; + + brinBlk->dp->size += brinBlk->size[j]; + *fileSize += brinBlk->size[j]; + } + +#if 0 + SBrinRecord record; + for (int32_t i = 0; i < BRIN_BLOCK_SIZE(brinBlock); i++) { + tBrinBlockGet(brinBlock, i, &record); + tsdbInfo("write brin block, block num:%04d, idx:%04d suid:%ld, uid:%ld, offset:%ld, numRow:%d, count:%d", + TARRAY2_SIZE(brinBlkArray), i, record.suid, record.uid, record.blockOffset, record.numRow, record.count); + } +#endif + + // append to brinBlkArray + code = TARRAY2_APPEND_PTR(brinBlkArray, brinBlk); + if (code) return code; + + tBrinBlockClear(brinBlock); + + return 0; +} + +static int32_t tsdbDataFileWriteBrinBlock(SDataFileWriter *writer) { + if (BRIN_BLOCK_SIZE(writer->brinBlock) == 0) return 0; + + int32_t code = 0; + int32_t lino = 0; + + code = tsdbFileWriteBrinBlock(writer->fd[TSDB_FTYPE_HEAD], writer->brinBlock, writer->config->cmprAlg, + &writer->files[TSDB_FTYPE_HEAD].size, writer->brinBlkArray, writer->config->bufArr); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbDataFileWriteBrinRecord(SDataFileWriter *writer, const SBrinRecord *record) { + int32_t code = 0; + int32_t lino = 0; + + code = tBrinBlockPut(writer->brinBlock, record); + TSDB_CHECK_CODE(code, lino, _exit); + + if (BRIN_BLOCK_SIZE(writer->brinBlock) >= writer->config->maxRow) { + code = tsdbDataFileWriteBrinBlock(writer); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbDataFileDoWriteBlockData(SDataFileWriter *writer, SBlockData *bData) { + if (bData->nRow == 0) return 0; + + ASSERT(bData->uid); + + int32_t code = 0; + int32_t lino = 0; + + SBrinRecord record[1] = {{ + .suid = bData->suid, + .uid = bData->uid, + .firstKey = bData->aTSKEY[0], + .firstKeyVer = bData->aVersion[0], + .lastKey = bData->aTSKEY[bData->nRow - 1], + .lastKeyVer = bData->aVersion[bData->nRow - 1], + .minVer = bData->aVersion[0], + .maxVer = bData->aVersion[0], + .blockOffset = writer->files[TSDB_FTYPE_DATA].size, + .smaOffset = writer->files[TSDB_FTYPE_SMA].size, + .blockSize = 0, + .blockKeySize = 0, + .smaSize = 0, + .numRow = bData->nRow, + .count = 1, + }}; + + for (int32_t i = 1; i < bData->nRow; ++i) { + if (bData->aTSKEY[i] != bData->aTSKEY[i - 1]) { + record->count++; + } + if (bData->aVersion[i] < record->minVer) { + record->minVer = bData->aVersion[i]; + } + if (bData->aVersion[i] > record->maxVer) { + record->maxVer = bData->aVersion[i]; + } + } + + // to .data file + int32_t sizeArr[5] = {0}; + + code = tCmprBlockData(bData, writer->config->cmprAlg, NULL, NULL, writer->config->bufArr, sizeArr); + TSDB_CHECK_CODE(code, lino, _exit); + + record->blockKeySize = sizeArr[3] + sizeArr[2]; + record->blockSize = sizeArr[0] + sizeArr[1] + record->blockKeySize; + + for (int32_t i = 3; i >= 0; --i) { + if (sizeArr[i]) { + code = tsdbWriteFile(writer->fd[TSDB_FTYPE_DATA], writer->files[TSDB_FTYPE_DATA].size, writer->config->bufArr[i], + sizeArr[i]); + TSDB_CHECK_CODE(code, lino, _exit); + writer->files[TSDB_FTYPE_DATA].size += sizeArr[i]; + } + } + + // to .sma file + for (int32_t i = 0; i < bData->nColData; ++i) { + SColData *colData = bData->aColData + i; + if ((!colData->smaOn) || ((colData->flag & HAS_VALUE) == 0)) continue; + + SColumnDataAgg sma[1] = {{.colId = colData->cid}}; + tColDataCalcSMA[colData->type](colData, &sma->sum, &sma->max, &sma->min, &sma->numOfNull); + + int32_t size = tPutColumnDataAgg(NULL, sma); + + code = tRealloc(&writer->config->bufArr[0], record->smaSize + size); + TSDB_CHECK_CODE(code, lino, _exit); + + tPutColumnDataAgg(writer->config->bufArr[0] + record->smaSize, sma); + record->smaSize += size; + } + + if (record->smaSize > 0) { + code = tsdbWriteFile(writer->fd[TSDB_FTYPE_SMA], record->smaOffset, writer->config->bufArr[0], record->smaSize); + TSDB_CHECK_CODE(code, lino, _exit); + writer->files[TSDB_FTYPE_SMA].size += record->smaSize; + } + + // append SBrinRecord + code = tsdbDataFileWriteBrinRecord(writer, record); + TSDB_CHECK_CODE(code, lino, _exit); + + tBlockDataClear(bData); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbDataFileWriteDataBlk(SDataFileWriter *writer, const TDataBlkArray *dataBlkArray) { + if (TARRAY2_SIZE(dataBlkArray) == 0) return 0; + + int32_t code = 0; + int32_t lino = 0; + + int32_t ftype = TSDB_FTYPE_HEAD; + SBlockIdx blockIdx[1] = {{ + .suid = writer->ctx->tbid->suid, + .uid = writer->ctx->tbid->uid, + .offset = writer->files[ftype].size, + .size = TARRAY2_DATA_LEN(dataBlkArray), + }}; + + code = + tsdbWriteFile(writer->fd[ftype], blockIdx->offset, (const uint8_t *)TARRAY2_DATA(dataBlkArray), blockIdx->size); + TSDB_CHECK_CODE(code, lino, _exit); + writer->files[ftype].size += blockIdx->size; + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbDataFileDoWriteTSRow(SDataFileWriter *writer, TSDBROW *row) { + int32_t code = 0; + int32_t lino = 0; + + // update/append + if (row->type == TSDBROW_ROW_FMT) { + code = tsdbUpdateSkmRow(writer->config->tsdb, writer->ctx->tbid, TSDBROW_SVERSION(row), writer->config->skmRow); + TSDB_CHECK_CODE(code, lino, _exit); + } + + TSDBKEY key[1]; + if (row->type == TSDBROW_ROW_FMT) { + key->ts = row->pTSRow->ts; + key->version = row->version; + } else { + key->ts = row->pBlockData->aTSKEY[row->iRow]; + key->version = row->pBlockData->aVersion[row->iRow]; + } + if (key->version <= writer->config->compactVersion // + && writer->blockData->nRow > 0 // + && writer->blockData->aTSKEY[writer->blockData->nRow - 1] == key->ts // + ) { + code = tBlockDataUpdateRow(writer->blockData, row, writer->config->skmRow->pTSchema); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + if (writer->blockData->nRow >= writer->config->maxRow) { + code = tsdbDataFileDoWriteBlockData(writer, writer->blockData); + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tBlockDataAppendRow(writer->blockData, row, writer->config->skmRow->pTSchema, writer->ctx->tbid->uid); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbDataFileDoWriteTableOldData(SDataFileWriter *writer, const TSDBKEY *key) { + if (writer->ctx->tbHasOldData == false) return 0; + + int32_t code = 0; + int32_t lino = 0; + + for (;;) { + for (;;) { + // SBlockData + for (; writer->ctx->blockDataIdx < writer->ctx->blockData->nRow; writer->ctx->blockDataIdx++) { + if (key->ts < writer->ctx->blockData->aTSKEY[writer->ctx->blockDataIdx] // + || (key->ts == writer->ctx->blockData->aTSKEY[writer->ctx->blockDataIdx] && + key->version < writer->ctx->blockData->aVersion[writer->ctx->blockDataIdx])) { + goto _exit; + } else { + TSDBROW row = tsdbRowFromBlockData(writer->ctx->blockData, writer->ctx->blockDataIdx); + code = tsdbDataFileDoWriteTSRow(writer, &row); + TSDB_CHECK_CODE(code, lino, _exit); + } + } + + // SBrinBlock + if (writer->ctx->brinBlockIdx >= BRIN_BLOCK_SIZE(writer->ctx->brinBlock)) { + break; + } + + for (; writer->ctx->brinBlockIdx < BRIN_BLOCK_SIZE(writer->ctx->brinBlock); writer->ctx->brinBlockIdx++) { + if (TARRAY2_GET(writer->ctx->brinBlock->uid, writer->ctx->brinBlockIdx) != writer->ctx->tbid->uid) { + writer->ctx->tbHasOldData = false; + goto _exit; + } + + if (key->ts < TARRAY2_GET(writer->ctx->brinBlock->firstKey, writer->ctx->brinBlockIdx) // + || (key->ts == TARRAY2_GET(writer->ctx->brinBlock->firstKey, writer->ctx->brinBlockIdx) && + key->version < TARRAY2_GET(writer->ctx->brinBlock->firstKeyVer, writer->ctx->brinBlockIdx))) { + goto _exit; + } else { + SBrinRecord record[1]; + tBrinBlockGet(writer->ctx->brinBlock, writer->ctx->brinBlockIdx, record); + if (key->ts > record->lastKey || (key->ts == record->lastKey && key->version > record->maxVer)) { + if (writer->blockData->nRow > 0) { + code = tsdbDataFileDoWriteBlockData(writer, writer->blockData); + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbDataFileWriteBrinRecord(writer, record); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + code = tsdbDataFileReadBlockData(writer->ctx->reader, record, writer->ctx->blockData); + TSDB_CHECK_CODE(code, lino, _exit); + + writer->ctx->blockDataIdx = 0; + writer->ctx->brinBlockIdx++; + break; + } + } + } + } + + // SBrinBlk + if (writer->ctx->brinBlkArrayIdx >= TARRAY2_SIZE(writer->ctx->brinBlkArray)) { + writer->ctx->brinBlkArray = NULL; + writer->ctx->tbHasOldData = false; + goto _exit; + } + + for (; writer->ctx->brinBlkArrayIdx < TARRAY2_SIZE(writer->ctx->brinBlkArray); writer->ctx->brinBlkArrayIdx++) { + const SBrinBlk *brinBlk = TARRAY2_GET_PTR(writer->ctx->brinBlkArray, writer->ctx->brinBlkArrayIdx); + + if (brinBlk->minTbid.uid != writer->ctx->tbid->uid) { + writer->ctx->tbHasOldData = false; + goto _exit; + } + + code = tsdbDataFileReadBrinBlock(writer->ctx->reader, brinBlk, writer->ctx->brinBlock); + TSDB_CHECK_CODE(code, lino, _exit); + + writer->ctx->brinBlockIdx = 0; + writer->ctx->brinBlkArrayIdx++; + break; + } + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbDataFileDoWriteTSData(SDataFileWriter *writer, TSDBROW *row) { + int32_t code = 0; + int32_t lino = 0; + + if (writer->ctx->tbHasOldData) { + TSDBKEY key[1]; + if (row->type == TSDBROW_ROW_FMT) { + key->ts = row->pTSRow->ts; + key->version = row->version; + } else { + key->ts = row->pBlockData->aTSKEY[row->iRow]; + key->version = row->pBlockData->aVersion[row->iRow]; + } + + code = tsdbDataFileDoWriteTableOldData(writer, key); + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbDataFileDoWriteTSRow(writer, row); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbDataFileWriteTableDataEnd(SDataFileWriter *writer) { + if (writer->ctx->tbid->uid == 0) return 0; + + int32_t code = 0; + int32_t lino = 0; + + if (writer->ctx->tbHasOldData) { + TSDBKEY key = { + .ts = TSKEY_MAX, + .version = VERSION_MAX, + }; + + code = tsdbDataFileDoWriteTableOldData(writer, &key); + TSDB_CHECK_CODE(code, lino, _exit); + + ASSERT(writer->ctx->tbHasOldData == false); + } + + code = tsdbDataFileDoWriteBlockData(writer, writer->blockData); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbDataFileWriteTableDataBegin(SDataFileWriter *writer, const TABLEID *tbid) { + int32_t code = 0; + int32_t lino = 0; + + ASSERT(writer->ctx->blockDataIdx == writer->ctx->blockData->nRow); + ASSERT(writer->blockData->nRow == 0); + + SMetaInfo info; + bool drop = false; + TABLEID tbid1[1]; + writer->ctx->tbHasOldData = false; + while (writer->ctx->brinBlkArray) { // skip data of previous table + for (; writer->ctx->brinBlockIdx < BRIN_BLOCK_SIZE(writer->ctx->brinBlock); writer->ctx->brinBlockIdx++) { + TABLEID tbid2[1] = {{ + .suid = TARRAY2_GET(writer->ctx->brinBlock->suid, writer->ctx->brinBlockIdx), + .uid = TARRAY2_GET(writer->ctx->brinBlock->uid, writer->ctx->brinBlockIdx), + }}; + + if (tbid2->uid == tbid->uid) { + writer->ctx->tbHasOldData = true; + goto _begin; + } else if (tbid2->suid > tbid->suid || (tbid2->suid == tbid->suid && tbid2->uid > tbid->uid)) { + goto _begin; + } else { + if (tbid2->uid != writer->ctx->tbid->uid) { + if (drop && tbid1->uid == tbid2->uid) { + continue; + } else if (metaGetInfo(writer->config->tsdb->pVnode->pMeta, tbid2->uid, &info, NULL) != 0) { + drop = true; + *tbid1 = *tbid2; + continue; + } else { + drop = false; + writer->ctx->tbid[0] = *tbid2; + } + } + + SBrinRecord record[1]; + tBrinBlockGet(writer->ctx->brinBlock, writer->ctx->brinBlockIdx, record); + + code = tsdbDataFileWriteBrinRecord(writer, record); + TSDB_CHECK_CODE(code, lino, _exit); + } + } + + if (writer->ctx->brinBlkArrayIdx >= TARRAY2_SIZE(writer->ctx->brinBlkArray)) { + writer->ctx->brinBlkArray = NULL; + break; + } + + for (; writer->ctx->brinBlkArrayIdx < TARRAY2_SIZE(writer->ctx->brinBlkArray); writer->ctx->brinBlkArrayIdx++) { + const SBrinBlk *brinBlk = TARRAY2_GET_PTR(writer->ctx->brinBlkArray, writer->ctx->brinBlkArrayIdx); + + code = tsdbDataFileReadBrinBlock(writer->ctx->reader, brinBlk, writer->ctx->brinBlock); + TSDB_CHECK_CODE(code, lino, _exit); + + writer->ctx->brinBlockIdx = 0; + writer->ctx->brinBlkArrayIdx++; + break; + } + } + +_begin: + writer->ctx->tbid[0] = *tbid; + + if (tbid->uid == INT64_MAX) goto _exit; + + code = tsdbUpdateSkmTb(writer->config->tsdb, tbid, writer->config->skmTb); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tBlockDataInit(writer->blockData, writer->ctx->tbid, writer->config->skmTb->pTSchema, NULL, 0); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbFileWriteHeadFooter(STsdbFD *fd, int64_t *fileSize, const SHeadFooter *footer) { + int32_t code = tsdbWriteFile(fd, *fileSize, (const uint8_t *)footer, sizeof(*footer)); + if (code) return code; + *fileSize += sizeof(*footer); + return 0; +} + +static int32_t tsdbDataFileWriteHeadFooter(SDataFileWriter *writer) { + int32_t code = 0; + int32_t lino = 0; + + code = tsdbFileWriteHeadFooter(writer->fd[TSDB_FTYPE_HEAD], &writer->files[TSDB_FTYPE_HEAD].size, writer->headFooter); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbDataFileDoWriteTombBlock(SDataFileWriter *writer) { + if (TOMB_BLOCK_SIZE(writer->tombBlock) == 0) return 0; + + int32_t code = 0; + int32_t lino = 0; + + code = tsdbFileWriteTombBlock(writer->fd[TSDB_FTYPE_TOMB], writer->tombBlock, writer->config->cmprAlg, + &writer->files[TSDB_FTYPE_TOMB].size, writer->tombBlkArray, writer->config->bufArr); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbDataFileDoWriteTombBlk(SDataFileWriter *writer) { + ASSERT(TARRAY2_SIZE(writer->tombBlkArray) > 0); + + int32_t code = 0; + int32_t lino = 0; + + code = tsdbFileWriteTombBlk(writer->fd[TSDB_FTYPE_TOMB], writer->tombBlkArray, writer->tombFooter->tombBlkPtr, + &writer->files[TSDB_FTYPE_TOMB].size); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbFileWriteTombFooter(STsdbFD *fd, const STombFooter *footer, int64_t *fileSize) { + int32_t code = tsdbWriteFile(fd, *fileSize, (const uint8_t *)footer, sizeof(*footer)); + if (code) return code; + *fileSize += sizeof(*footer); + return 0; +} + +static int32_t tsdbDataFileWriteTombFooter(SDataFileWriter *writer) { + int32_t code = 0; + int32_t lino = 0; + + code = tsdbFileWriteTombFooter(writer->fd[TSDB_FTYPE_TOMB], writer->tombFooter, &writer->files[TSDB_FTYPE_TOMB].size); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbDataFileDoWriteTombRecord(SDataFileWriter *writer, const STombRecord *record) { + int32_t code = 0; + int32_t lino = 0; + + while (writer->ctx->hasOldTomb) { + for (; writer->ctx->tombBlockIdx < TOMB_BLOCK_SIZE(writer->ctx->tombBlock); writer->ctx->tombBlockIdx++) { + STombRecord record1[1]; + tTombBlockGet(writer->ctx->tombBlock, writer->ctx->tombBlockIdx, record1); + + int32_t c = tTombRecordCompare(record, record1); + if (c < 0) { + goto _write; + } else if (c > 0) { + code = tTombBlockPut(writer->tombBlock, record1); + TSDB_CHECK_CODE(code, lino, _exit); + + tsdbTrace("vgId:%d write tomb record to tomb file:%s, cid:%" PRId64 ", suid:%" PRId64 ", uid:%" PRId64 + ", version:%" PRId64, + TD_VID(writer->config->tsdb->pVnode), writer->fd[TSDB_FTYPE_TOMB]->path, writer->config->cid, + record1->suid, record1->uid, record1->version); + + if (TOMB_BLOCK_SIZE(writer->tombBlock) >= writer->config->maxRow) { + code = tsdbDataFileDoWriteTombBlock(writer); + TSDB_CHECK_CODE(code, lino, _exit); + } + } else { + ASSERT(0); + } + } + + if (writer->ctx->tombBlkArrayIdx >= TARRAY2_SIZE(writer->ctx->tombBlkArray)) { + writer->ctx->hasOldTomb = false; + break; + } + + for (; writer->ctx->tombBlkArrayIdx < TARRAY2_SIZE(writer->ctx->tombBlkArray); ++writer->ctx->tombBlkArrayIdx) { + const STombBlk *tombBlk = TARRAY2_GET_PTR(writer->ctx->tombBlkArray, writer->ctx->tombBlkArrayIdx); + + code = tsdbDataFileReadTombBlock(writer->ctx->reader, tombBlk, writer->ctx->tombBlock); + TSDB_CHECK_CODE(code, lino, _exit); + + writer->ctx->tombBlockIdx = 0; + writer->ctx->tombBlkArrayIdx++; + break; + } + } + +_write: + if (record->suid == INT64_MAX) goto _exit; + + code = tTombBlockPut(writer->tombBlock, record); + TSDB_CHECK_CODE(code, lino, _exit); + + tsdbTrace("vgId:%d write tomb record to tomb file:%s, cid:%" PRId64 ", suid:%" PRId64 ", uid:%" PRId64 + ", version:%" PRId64, + TD_VID(writer->config->tsdb->pVnode), writer->fd[TSDB_FTYPE_TOMB]->path, writer->config->cid, record->suid, + record->uid, record->version); + + if (TOMB_BLOCK_SIZE(writer->tombBlock) >= writer->config->maxRow) { + code = tsdbDataFileDoWriteTombBlock(writer); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbFileWriteBrinBlk(STsdbFD *fd, TBrinBlkArray *brinBlkArray, SFDataPtr *ptr, int64_t *fileSize) { + ASSERT(TARRAY2_SIZE(brinBlkArray) > 0); + ptr->offset = *fileSize; + ptr->size = TARRAY2_DATA_LEN(brinBlkArray); + + int32_t code = tsdbWriteFile(fd, ptr->offset, (uint8_t *)TARRAY2_DATA(brinBlkArray), ptr->size); + if (code) return code; + + *fileSize += ptr->size; + return 0; +} + +static int32_t tsdbDataFileWriteBrinBlk(SDataFileWriter *writer) { + int32_t code = 0; + int32_t lino = 0; + + code = tsdbFileWriteBrinBlk(writer->fd[TSDB_FTYPE_HEAD], writer->brinBlkArray, writer->headFooter->brinBlkPtr, + &writer->files[TSDB_FTYPE_HEAD].size); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbDataFileWriterCloseCommit(SDataFileWriter *writer, TFileOpArray *opArr) { + int32_t code = 0; + int32_t lino = 0; + + int32_t ftype; + STFileOp op; + + if (writer->fd[TSDB_FTYPE_HEAD]) { + TABLEID tbid[1] = {{ + .suid = INT64_MAX, + .uid = INT64_MAX, + }}; + + code = tsdbDataFileWriteTableDataEnd(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbDataFileWriteTableDataBegin(writer, tbid); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbDataFileWriteBrinBlock(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbDataFileWriteBrinBlk(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbDataFileWriteHeadFooter(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + // .head + ftype = TSDB_FTYPE_HEAD; + if (writer->config->files[ftype].exist) { + op = (STFileOp){ + .optype = TSDB_FOP_REMOVE, + .fid = writer->config->fid, + .of = writer->config->files[ftype].file, + }; + code = TARRAY2_APPEND(opArr, op); + TSDB_CHECK_CODE(code, lino, _exit); + } + op = (STFileOp){ + .optype = TSDB_FOP_CREATE, + .fid = writer->config->fid, + .nf = writer->files[ftype], + }; + code = TARRAY2_APPEND(opArr, op); + TSDB_CHECK_CODE(code, lino, _exit); + + // .data + ftype = TSDB_FTYPE_DATA; + if (!writer->config->files[ftype].exist) { + op = (STFileOp){ + .optype = TSDB_FOP_CREATE, + .fid = writer->config->fid, + .nf = writer->files[ftype], + }; + code = TARRAY2_APPEND(opArr, op); + TSDB_CHECK_CODE(code, lino, _exit); + } else if (writer->config->files[ftype].file.size != writer->files[ftype].size) { + op = (STFileOp){ + .optype = TSDB_FOP_MODIFY, + .fid = writer->config->fid, + .of = writer->config->files[ftype].file, + .nf = writer->files[ftype], + }; + code = TARRAY2_APPEND(opArr, op); + TSDB_CHECK_CODE(code, lino, _exit); + } + + // .sma + ftype = TSDB_FTYPE_SMA; + if (!writer->config->files[ftype].exist) { + op = (STFileOp){ + .optype = TSDB_FOP_CREATE, + .fid = writer->config->fid, + .nf = writer->files[ftype], + }; + code = TARRAY2_APPEND(opArr, op); + TSDB_CHECK_CODE(code, lino, _exit); + } else if (writer->config->files[ftype].file.size != writer->files[ftype].size) { + op = (STFileOp){ + .optype = TSDB_FOP_MODIFY, + .fid = writer->config->fid, + .of = writer->config->files[ftype].file, + .nf = writer->files[ftype], + }; + code = TARRAY2_APPEND(opArr, op); + TSDB_CHECK_CODE(code, lino, _exit); + } + } + + if (writer->fd[TSDB_FTYPE_TOMB]) { + STombRecord record[1] = {{ + .suid = INT64_MAX, + .uid = INT64_MAX, + .version = INT64_MAX, + }}; + + code = tsdbDataFileDoWriteTombRecord(writer, record); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbDataFileDoWriteTombBlock(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbDataFileDoWriteTombBlk(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbDataFileWriteTombFooter(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + ftype = TSDB_FTYPE_TOMB; + if (writer->config->files[ftype].exist) { + op = (STFileOp){ + .optype = TSDB_FOP_REMOVE, + .fid = writer->config->fid, + .of = writer->config->files[ftype].file, + }; + code = TARRAY2_APPEND(opArr, op); + TSDB_CHECK_CODE(code, lino, _exit); + } + op = (STFileOp){ + .optype = TSDB_FOP_CREATE, + .fid = writer->config->fid, + .nf = writer->files[ftype], + }; + code = TARRAY2_APPEND(opArr, op); + TSDB_CHECK_CODE(code, lino, _exit); + } + + for (int32_t i = 0; i < TSDB_FTYPE_MAX; ++i) { + if (writer->fd[i]) { + code = tsdbFsyncFile(writer->fd[i]); + TSDB_CHECK_CODE(code, lino, _exit); + tsdbCloseFile(&writer->fd[i]); + } + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbDataFileWriterOpenDataFD(SDataFileWriter *writer) { + int32_t code = 0; + int32_t lino = 0; + + int32_t ftypes[] = {TSDB_FTYPE_HEAD, TSDB_FTYPE_DATA, TSDB_FTYPE_SMA}; + + for (int32_t i = 0; i < ARRAY_SIZE(ftypes); ++i) { + int32_t ftype = ftypes[i]; + + char fname[TSDB_FILENAME_LEN]; + int32_t flag = TD_FILE_READ | TD_FILE_WRITE; + + if (writer->files[ftype].size == 0) { + flag |= (TD_FILE_CREATE | TD_FILE_TRUNC); + } + + tsdbTFileName(writer->config->tsdb, &writer->files[ftype], fname); + code = tsdbOpenFile(fname, writer->config->szPage, flag, &writer->fd[ftype]); + TSDB_CHECK_CODE(code, lino, _exit); + + if (writer->files[ftype].size == 0) { + uint8_t hdr[TSDB_FHDR_SIZE] = {0}; + + code = tsdbWriteFile(writer->fd[ftype], 0, hdr, TSDB_FHDR_SIZE); + TSDB_CHECK_CODE(code, lino, _exit); + + writer->files[ftype].size += TSDB_FHDR_SIZE; + } + } + + if (writer->ctx->reader) { + code = tsdbDataFileReadBrinBlk(writer->ctx->reader, &writer->ctx->brinBlkArray); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbDataFileWriterOpen(const SDataFileWriterConfig *config, SDataFileWriter **writer) { + writer[0] = taosMemoryCalloc(1, sizeof(*writer[0])); + if (!writer[0]) return TSDB_CODE_OUT_OF_MEMORY; + + writer[0]->config[0] = config[0]; + return 0; +} + +int32_t tsdbDataFileWriterClose(SDataFileWriter **writer, bool abort, TFileOpArray *opArr) { + if (writer[0] == NULL) return 0; + + int32_t code = 0; + int32_t lino = 0; + + if (writer[0]->ctx->opened) { + if (abort) { + code = tsdbDataFileWriterCloseAbort(writer[0]); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + code = tsdbDataFileWriterCloseCommit(writer[0], opArr); + TSDB_CHECK_CODE(code, lino, _exit); + } + tsdbDataFileWriterDoClose(writer[0]); + } + taosMemoryFree(writer[0]); + writer[0] = NULL; + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer[0]->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbDataFileWriteRow(SDataFileWriter *writer, SRowInfo *row) { + int32_t code = 0; + int32_t lino = 0; + + if (!writer->ctx->opened) { + code = tsdbDataFileWriterDoOpen(writer); + TSDB_CHECK_CODE(code, lino, _exit); + } + + if (writer->fd[TSDB_FTYPE_HEAD] == NULL) { + code = tsdbDataFileWriterOpenDataFD(writer); + TSDB_CHECK_CODE(code, lino, _exit); + } + + if (row->uid != writer->ctx->tbid->uid) { + code = tsdbDataFileWriteTableDataEnd(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbDataFileWriteTableDataBegin(writer, (TABLEID *)row); + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbDataFileDoWriteTSData(writer, &row->row); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbDataFileWriteBlockData(SDataFileWriter *writer, SBlockData *bData) { + if (bData->nRow == 0) return 0; + + int32_t code = 0; + int32_t lino = 0; + + ASSERT(bData->uid); + + if (!writer->ctx->opened) { + code = tsdbDataFileWriterDoOpen(writer); + TSDB_CHECK_CODE(code, lino, _exit); + } + + if (writer->fd[TSDB_FTYPE_DATA] == NULL) { + code = tsdbDataFileWriterOpenDataFD(writer); + TSDB_CHECK_CODE(code, lino, _exit); + } + + if (bData->uid != writer->ctx->tbid->uid) { + code = tsdbDataFileWriteTableDataEnd(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbDataFileWriteTableDataBegin(writer, (TABLEID *)bData); + TSDB_CHECK_CODE(code, lino, _exit); + } + + if (writer->ctx->tbHasOldData) { + TSDBKEY key = { + .ts = bData->aTSKEY[0], + .version = bData->aVersion[0], + }; + + code = tsdbDataFileDoWriteTableOldData(writer, &key); + TSDB_CHECK_CODE(code, lino, _exit); + } + + if (!writer->ctx->tbHasOldData // + && writer->blockData->nRow == 0 // + ) { + code = tsdbDataFileDoWriteBlockData(writer, bData); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + for (int32_t i = 0; i < bData->nRow; ++i) { + TSDBROW row[1] = {tsdbRowFromBlockData(bData, i)}; + code = tsdbDataFileDoWriteTSData(writer, row); + TSDB_CHECK_CODE(code, lino, _exit); + } + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbDataFileFlush(SDataFileWriter *writer) { + ASSERT(writer->ctx->opened); + + if (writer->blockData->nRow == 0) return 0; + if (writer->ctx->tbHasOldData) return 0; + + return tsdbDataFileDoWriteBlockData(writer, writer->blockData); +} + +static int32_t tsdbDataFileWriterOpenTombFD(SDataFileWriter *writer) { + int32_t code = 0; + int32_t lino = 0; + + char fname[TSDB_FILENAME_LEN]; + int32_t ftype = TSDB_FTYPE_TOMB; + + ASSERT(writer->files[ftype].size == 0); + + int32_t flag = (TD_FILE_READ | TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC); + + tsdbTFileName(writer->config->tsdb, writer->files + ftype, fname); + code = tsdbOpenFile(fname, writer->config->szPage, flag, &writer->fd[ftype]); + TSDB_CHECK_CODE(code, lino, _exit); + + uint8_t hdr[TSDB_FHDR_SIZE] = {0}; + code = tsdbWriteFile(writer->fd[ftype], 0, hdr, TSDB_FHDR_SIZE); + TSDB_CHECK_CODE(code, lino, _exit); + writer->files[ftype].size += TSDB_FHDR_SIZE; + + if (writer->ctx->reader) { + code = tsdbDataFileReadTombBlk(writer->ctx->reader, &writer->ctx->tombBlkArray); + TSDB_CHECK_CODE(code, lino, _exit); + + if (TARRAY2_SIZE(writer->ctx->tombBlkArray) > 0) { + writer->ctx->hasOldTomb = true; + } + + writer->ctx->tombBlkArrayIdx = 0; + tTombBlockClear(writer->ctx->tombBlock); + writer->ctx->tombBlockIdx = 0; + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbDataFileWriteTombRecord(SDataFileWriter *writer, const STombRecord *record) { + int32_t code = 0; + int32_t lino = 0; + + if (!writer->ctx->opened) { + code = tsdbDataFileWriterDoOpen(writer); + TSDB_CHECK_CODE(code, lino, _exit); + } + + if (writer->fd[TSDB_FTYPE_TOMB] == NULL) { + code = tsdbDataFileWriterOpenTombFD(writer); + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbDataFileDoWriteTombRecord(writer, record); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbDataFileRW.h b/source/dnode/vnode/src/tsdb/tsdbDataFileRW.h new file mode 100644 index 0000000000000000000000000000000000000000..827b58fb4a247a0d68c606c7c5a4eaa05b4db5c4 --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbDataFileRW.h @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbDef.h" +#include "tsdbFSet2.h" +#include "tsdbSttFileRW.h" +#include "tsdbUtil2.h" + +#ifndef _TSDB_DATA_FILE_RW_H +#define _TSDB_DATA_FILE_RW_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef TARRAY2(SBlockIdx) TBlockIdxArray; +typedef TARRAY2(SDataBlk) TDataBlkArray; +typedef TARRAY2(SColumnDataAgg) TColumnDataAggArray; + +typedef struct { + SFDataPtr brinBlkPtr[1]; + SFDataPtr rsrvd[2]; +} SHeadFooter; + +typedef struct { + SFDataPtr tombBlkPtr[1]; + SFDataPtr rsrvd[2]; +} STombFooter; + +// SDataFileReader ============================================= +typedef struct SDataFileReader SDataFileReader; +typedef struct SDataFileReaderConfig { + STsdb *tsdb; + int32_t szPage; + struct { + bool exist; + STFile file; + } files[TSDB_FTYPE_MAX]; + uint8_t **bufArr; +} SDataFileReaderConfig; + +int32_t tsdbDataFileReaderOpen(const char *fname[/* TSDB_FTYPE_MAX */], const SDataFileReaderConfig *config, + SDataFileReader **reader); +int32_t tsdbDataFileReaderClose(SDataFileReader **reader); +// .head +int32_t tsdbDataFileReadBrinBlk(SDataFileReader *reader, const TBrinBlkArray **brinBlkArray); +int32_t tsdbDataFileReadBrinBlock(SDataFileReader *reader, const SBrinBlk *brinBlk, SBrinBlock *brinBlock); +// .data +int32_t tsdbDataFileReadBlockData(SDataFileReader *reader, const SBrinRecord *record, SBlockData *bData); +int32_t tsdbDataFileReadBlockDataByColumn(SDataFileReader *reader, const SBrinRecord *record, SBlockData *bData, + STSchema *pTSchema, int16_t cids[], int32_t ncid); +// .sma +int32_t tsdbDataFileReadBlockSma(SDataFileReader *reader, const SBrinRecord *record, + TColumnDataAggArray *columnDataAggArray); +// .tomb +int32_t tsdbDataFileReadTombBlk(SDataFileReader *reader, const TTombBlkArray **tombBlkArray); +int32_t tsdbDataFileReadTombBlock(SDataFileReader *reader, const STombBlk *tombBlk, STombBlock *tData); + +// SDataFileWriter ============================================= +typedef struct SDataFileWriter SDataFileWriter; +typedef struct SDataFileWriterConfig { + STsdb *tsdb; + int8_t cmprAlg; + int32_t maxRow; + int32_t szPage; + int32_t fid; + int64_t cid; + SDiskID did; + int64_t compactVersion; + struct { + bool exist; + STFile file; + } files[TSDB_FTYPE_MAX]; + SSkmInfo *skmTb; + SSkmInfo *skmRow; + uint8_t **bufArr; +} SDataFileWriterConfig; + +int32_t tsdbDataFileWriterOpen(const SDataFileWriterConfig *config, SDataFileWriter **writer); +int32_t tsdbDataFileWriterClose(SDataFileWriter **writer, bool abort, TFileOpArray *opArr); + +int32_t tsdbDataFileWriteRow(SDataFileWriter *writer, SRowInfo *row); +int32_t tsdbDataFileWriteBlockData(SDataFileWriter *writer, SBlockData *bData); +int32_t tsdbDataFileFlush(SDataFileWriter *writer); + +int32_t tsdbDataFileWriteTombRecord(SDataFileWriter *writer, const STombRecord *record); + +#ifdef __cplusplus +} +#endif + +#endif /*_TSDB_DATA_FILE_RW_H*/ \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbDef.h b/source/dnode/vnode/src/tsdb/tsdbDef.h new file mode 100644 index 0000000000000000000000000000000000000000..e768f68b15654ab6cc9327795b80783557896c3e --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbDef.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tarray2.h" +#include "tsdb.h" + +#ifndef _TD_TSDB_DEF_H_ +#define _TD_TSDB_DEF_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#define TSDB_ERROR_LOG(vid, lino, code) \ + tsdbError("vgId:%d %s failed at line %d since %s", vid, __func__, lino, tstrerror(code)) + +typedef struct SFDataPtr { + int64_t offset; + int64_t size; +} SFDataPtr; + +extern int32_t tsdbOpenFile(const char *path, int32_t szPage, int32_t flag, STsdbFD **ppFD); +extern void tsdbCloseFile(STsdbFD **ppFD); +extern int32_t tsdbWriteFile(STsdbFD *pFD, int64_t offset, const uint8_t *pBuf, int64_t size); +extern int32_t tsdbReadFile(STsdbFD *pFD, int64_t offset, uint8_t *pBuf, int64_t size); +extern int32_t tsdbFsyncFile(STsdbFD *pFD); + +#ifdef __cplusplus +} +#endif + +#endif /*_TD_TSDB_DEF_H_*/ \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbFS.c b/source/dnode/vnode/src/tsdb/tsdbFS.c index 41fdd05741108604e0a63f35b4e87c48bd204b75..ec116c717e05bc3a1be7379b18386c20eb8ef67d 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS.c +++ b/source/dnode/vnode/src/tsdb/tsdbFS.c @@ -181,10 +181,10 @@ static int32_t tsdbScanAndTryFixFS(STsdb *pTsdb) { TSDB_CHECK_CODE(code, lino, _exit); } - if (size != tsdbLogicToFileSize(pTsdb->fs.pDelFile->size, pTsdb->pVnode->config.tsdbPageSize)) { - code = TSDB_CODE_FILE_CORRUPTED; - TSDB_CHECK_CODE(code, lino, _exit); - } + // if (size != tsdbLogicToFileSize(pTsdb->fs.pDelFile->size, pTsdb->pVnode->config.tsdbPageSize)) { + // code = TSDB_CODE_FILE_CORRUPTED; + // TSDB_CHECK_CODE(code, lino, _exit); + // } } // SArray @@ -199,10 +199,10 @@ static int32_t tsdbScanAndTryFixFS(STsdb *pTsdb) { code = TAOS_SYSTEM_ERROR(errno); TSDB_CHECK_CODE(code, lino, _exit); } - if (size != tsdbLogicToFileSize(pSet->pHeadF->size, pTsdb->pVnode->config.tsdbPageSize)) { - code = TSDB_CODE_FILE_CORRUPTED; - TSDB_CHECK_CODE(code, lino, _exit); - } + // if (size != tsdbLogicToFileSize(pSet->pHeadF->size, pTsdb->pVnode->config.tsdbPageSize)) { + // code = TSDB_CODE_FILE_CORRUPTED; + // TSDB_CHECK_CODE(code, lino, _exit); + // } // data ========= tsdbDataFileName(pTsdb, pSet->diskId, pSet->fid, pSet->pDataF, fname); @@ -210,10 +210,10 @@ static int32_t tsdbScanAndTryFixFS(STsdb *pTsdb) { code = TAOS_SYSTEM_ERROR(errno); TSDB_CHECK_CODE(code, lino, _exit); } - if (size < tsdbLogicToFileSize(pSet->pDataF->size, pTsdb->pVnode->config.tsdbPageSize)) { - code = TSDB_CODE_FILE_CORRUPTED; - TSDB_CHECK_CODE(code, lino, _exit); - } + // if (size < tsdbLogicToFileSize(pSet->pDataF->size, pTsdb->pVnode->config.tsdbPageSize)) { + // code = TSDB_CODE_FILE_CORRUPTED; + // TSDB_CHECK_CODE(code, lino, _exit); + // } // else if (size > tsdbLogicToFileSize(pSet->pDataF->size, pTsdb->pVnode->config.tsdbPageSize)) { // code = tsdbDFileRollback(pTsdb, pSet, TSDB_DATA_FILE); // TSDB_CHECK_CODE(code, lino, _exit); @@ -225,10 +225,10 @@ static int32_t tsdbScanAndTryFixFS(STsdb *pTsdb) { code = TAOS_SYSTEM_ERROR(errno); TSDB_CHECK_CODE(code, lino, _exit); } - if (size < tsdbLogicToFileSize(pSet->pSmaF->size, pTsdb->pVnode->config.tsdbPageSize)) { - code = TSDB_CODE_FILE_CORRUPTED; - TSDB_CHECK_CODE(code, lino, _exit); - } + // if (size < tsdbLogicToFileSize(pSet->pSmaF->size, pTsdb->pVnode->config.tsdbPageSize)) { + // code = TSDB_CODE_FILE_CORRUPTED; + // TSDB_CHECK_CODE(code, lino, _exit); + // } // else if (size > tsdbLogicToFileSize(pSet->pSmaF->size, pTsdb->pVnode->config.tsdbPageSize)) { // code = tsdbDFileRollback(pTsdb, pSet, TSDB_SMA_FILE); // TSDB_CHECK_CODE(code, lino, _exit); @@ -241,10 +241,10 @@ static int32_t tsdbScanAndTryFixFS(STsdb *pTsdb) { code = TAOS_SYSTEM_ERROR(errno); TSDB_CHECK_CODE(code, lino, _exit); } - if (size != tsdbLogicToFileSize(pSet->aSttF[iStt]->size, pTsdb->pVnode->config.tsdbPageSize)) { - code = TSDB_CODE_FILE_CORRUPTED; - TSDB_CHECK_CODE(code, lino, _exit); - } + // if (size != tsdbLogicToFileSize(pSet->aSttF[iStt]->size, pTsdb->pVnode->config.tsdbPageSize)) { + // code = TSDB_CODE_FILE_CORRUPTED; + // TSDB_CHECK_CODE(code, lino, _exit); + // } } } @@ -270,7 +270,7 @@ int32_t tDFileSetCmprFn(const void *p1, const void *p2) { return 0; } -static void tsdbGetCurrentFName(STsdb *pTsdb, char *current, char *current_t) { +void tsdbGetCurrentFName(STsdb *pTsdb, char *current, char *current_t) { SVnode *pVnode = pTsdb->pVnode; int32_t offset = 0; @@ -289,7 +289,7 @@ static void tsdbGetCurrentFName(STsdb *pTsdb, char *current, char *current_t) { } } -static int32_t tsdbLoadFSFromFile(const char *fname, STsdbFS *pFS) { +static int32_t load_fs(const char *fname, STsdbFS *pFS) { int32_t code = 0; int32_t lino = 0; uint8_t *pData = NULL; @@ -666,7 +666,7 @@ static int32_t tsdbFSApplyChange(STsdb *pTsdb, STsdbFS *pFS) { taosArrayRemove(pTsdb->fs.aDFileSet, iOld); } else { code = tsdbNewFileSet(pTsdb, &fSet, pSetNew); - TSDB_CHECK_CODE(code, lino, _exit) + TSDB_CHECK_CODE(code, lino, _exit); if (taosArrayInsert(pTsdb->fs.aDFileSet, iOld, &fSet) == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; @@ -682,7 +682,7 @@ static int32_t tsdbFSApplyChange(STsdb *pTsdb, STsdbFS *pFS) { taosArrayRemove(pTsdb->fs.aDFileSet, iOld); } else { code = tsdbNewFileSet(pTsdb, &fSet, pSetNew); - TSDB_CHECK_CODE(code, lino, _exit) + TSDB_CHECK_CODE(code, lino, _exit); if (taosArrayInsert(pTsdb->fs.aDFileSet, iOld, &fSet) == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; @@ -723,7 +723,7 @@ int32_t tsdbFSCommit(STsdb *pTsdb) { code = tsdbFSCreate(&fs); TSDB_CHECK_CODE(code, lino, _exit); - code = tsdbLoadFSFromFile(current, &fs); + code = load_fs(current, &fs); TSDB_CHECK_CODE(code, lino, _exit); // apply file change @@ -768,7 +768,7 @@ int32_t tsdbFSOpen(STsdb *pTsdb, int8_t rollback) { tsdbGetCurrentFName(pTsdb, current, current_t); if (taosCheckExistFile(current)) { - code = tsdbLoadFSFromFile(current, &pTsdb->fs); + code = load_fs(current, &pTsdb->fs); TSDB_CHECK_CODE(code, lino, _exit); if (taosCheckExistFile(current_t)) { diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.c b/source/dnode/vnode/src/tsdb/tsdbFS2.c new file mode 100644 index 0000000000000000000000000000000000000000..6e7595c6ef29791811b23ce6d4a617ec077b6958 --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.c @@ -0,0 +1,893 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbFS2.h" +#include "tsdbUpgrade.h" +#include "vnd.h" + +extern int vnodeScheduleTask(int (*execute)(void *), void *arg); +extern int vnodeScheduleTaskEx(int tpid, int (*execute)(void *), void *arg); + +#define TSDB_FS_EDIT_MIN TSDB_FEDIT_COMMIT +#define TSDB_FS_EDIT_MAX (TSDB_FEDIT_MERGE + 1) + +enum { + TSDB_FS_STATE_NONE = 0, + TSDB_FS_STATE_OPEN, + TSDB_FS_STATE_EDIT, + TSDB_FS_STATE_CLOSE, +}; + +static const char *gCurrentFname[] = { + [TSDB_FCURRENT] = "current.json", + [TSDB_FCURRENT_C] = "current.c.json", + [TSDB_FCURRENT_M] = "current.m.json", +}; + +static int32_t create_fs(STsdb *pTsdb, STFileSystem **fs) { + fs[0] = taosMemoryCalloc(1, sizeof(*fs[0])); + if (fs[0] == NULL) return TSDB_CODE_OUT_OF_MEMORY; + + fs[0]->tsdb = pTsdb; + tsem_init(&fs[0]->canEdit, 0, 1); + fs[0]->state = TSDB_FS_STATE_NONE; + fs[0]->neid = 0; + TARRAY2_INIT(fs[0]->fSetArr); + TARRAY2_INIT(fs[0]->fSetArrTmp); + + // background task queue + taosThreadMutexInit(fs[0]->mutex, NULL); + fs[0]->bgTaskQueue->next = fs[0]->bgTaskQueue; + fs[0]->bgTaskQueue->prev = fs[0]->bgTaskQueue; + + return 0; +} + +static int32_t destroy_fs(STFileSystem **fs) { + if (fs[0] == NULL) return 0; + taosThreadMutexDestroy(fs[0]->mutex); + + ASSERT(fs[0]->bgTaskNum == 0); + + TARRAY2_DESTROY(fs[0]->fSetArr, NULL); + TARRAY2_DESTROY(fs[0]->fSetArrTmp, NULL); + tsem_destroy(&fs[0]->canEdit); + taosMemoryFree(fs[0]); + fs[0] = NULL; + return 0; +} + +int32_t current_fname(STsdb *pTsdb, char *fname, EFCurrentT ftype) { + int32_t offset = 0; + + vnodeGetPrimaryDir(pTsdb->path, pTsdb->pVnode->diskPrimary, pTsdb->pVnode->pTfs, fname, TSDB_FILENAME_LEN); + offset = strlen(fname); + snprintf(fname + offset, TSDB_FILENAME_LEN - offset - 1, "%s%s", TD_DIRSEP, gCurrentFname[ftype]); + + return 0; +} + +static int32_t save_json(const cJSON *json, const char *fname) { + int32_t code = 0; + + char *data = cJSON_PrintUnformatted(json); + if (data == NULL) return TSDB_CODE_OUT_OF_MEMORY; + + TdFilePtr fp = taosOpenFile(fname, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC); + if (fp == NULL) { + code = TAOS_SYSTEM_ERROR(code); + goto _exit; + } + + if (taosWriteFile(fp, data, strlen(data)) < 0) { + code = TAOS_SYSTEM_ERROR(code); + goto _exit; + } + + if (taosFsyncFile(fp) < 0) { + code = TAOS_SYSTEM_ERROR(code); + goto _exit; + } + + taosCloseFile(&fp); + +_exit: + taosMemoryFree(data); + return code; +} + +static int32_t load_json(const char *fname, cJSON **json) { + int32_t code = 0; + char *data = NULL; + + TdFilePtr fp = taosOpenFile(fname, TD_FILE_READ); + if (fp == NULL) return TAOS_SYSTEM_ERROR(code); + + int64_t size; + if (taosFStatFile(fp, &size, NULL) < 0) { + code = TAOS_SYSTEM_ERROR(code); + goto _exit; + } + + data = taosMemoryMalloc(size + 1); + if (data == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + + if (taosReadFile(fp, data, size) < 0) { + code = TAOS_SYSTEM_ERROR(code); + goto _exit; + } + data[size] = '\0'; + + json[0] = cJSON_Parse(data); + if (json[0] == NULL) { + code = TSDB_CODE_FILE_CORRUPTED; + goto _exit; + } + +_exit: + taosCloseFile(&fp); + if (data) taosMemoryFree(data); + if (code) json[0] = NULL; + return code; +} + +int32_t save_fs(const TFileSetArray *arr, const char *fname) { + int32_t code = 0; + int32_t lino = 0; + + cJSON *json = cJSON_CreateObject(); + if (!json) return TSDB_CODE_OUT_OF_MEMORY; + + // fmtv + if (cJSON_AddNumberToObject(json, "fmtv", 1) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + // fset + cJSON *ajson = cJSON_AddArrayToObject(json, "fset"); + if (!ajson) TSDB_CHECK_CODE(code = TSDB_CODE_OUT_OF_MEMORY, lino, _exit); + const STFileSet *fset; + TARRAY2_FOREACH(arr, fset) { + cJSON *item = cJSON_CreateObject(); + if (!item) TSDB_CHECK_CODE(code = TSDB_CODE_OUT_OF_MEMORY, lino, _exit); + cJSON_AddItemToArray(ajson, item); + + code = tsdbTFileSetToJson(fset, item); + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = save_json(json, fname); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + tsdbError("%s failed at line %d since %s", __func__, lino, tstrerror(code)); + } + cJSON_Delete(json); + return code; +} + +static int32_t load_fs(STsdb *pTsdb, const char *fname, TFileSetArray *arr) { + int32_t code = 0; + int32_t lino = 0; + + TARRAY2_CLEAR(arr, tsdbTFileSetClear); + + // load json + cJSON *json = NULL; + code = load_json(fname, &json); + TSDB_CHECK_CODE(code, lino, _exit); + + // parse json + const cJSON *item1; + + /* fmtv */ + item1 = cJSON_GetObjectItem(json, "fmtv"); + if (cJSON_IsNumber(item1)) { + ASSERT(item1->valuedouble == 1); + } else { + TSDB_CHECK_CODE(code = TSDB_CODE_FILE_CORRUPTED, lino, _exit); + } + + /* fset */ + item1 = cJSON_GetObjectItem(json, "fset"); + if (cJSON_IsArray(item1)) { + const cJSON *item2; + cJSON_ArrayForEach(item2, item1) { + STFileSet *fset; + code = tsdbJsonToTFileSet(pTsdb, item2, &fset); + TSDB_CHECK_CODE(code, lino, _exit); + + code = TARRAY2_APPEND(arr, fset); + TSDB_CHECK_CODE(code, lino, _exit); + } + } else { + TSDB_CHECK_CODE(code = TSDB_CODE_FILE_CORRUPTED, lino, _exit); + } + +_exit: + if (code) { + tsdbError("%s failed at line %d since %s, fname:%s", __func__, lino, tstrerror(code), fname); + } + if (json) cJSON_Delete(json); + return code; +} + +static bool is_same_file(const STFile *f1, const STFile f2) { + if (f1->type != f2.type) return false; + if (f1->did.level != f2.did.level) return false; + if (f1->did.id != f2.did.id) return false; + if (f1->cid != f2.cid) return false; + return true; +} + +static int32_t apply_commit(STFileSystem *fs) { + int32_t code = 0; + TFileSetArray *fsetArray1 = fs->fSetArr; + TFileSetArray *fsetArray2 = fs->fSetArrTmp; + int32_t i1 = 0, i2 = 0; + + while (i1 < TARRAY2_SIZE(fsetArray1) || i2 < TARRAY2_SIZE(fsetArray2)) { + STFileSet *fset1 = i1 < TARRAY2_SIZE(fsetArray1) ? TARRAY2_GET(fsetArray1, i1) : NULL; + STFileSet *fset2 = i2 < TARRAY2_SIZE(fsetArray2) ? TARRAY2_GET(fsetArray2, i2) : NULL; + + if (fset1 && fset2) { + if (fset1->fid < fset2->fid) { + // delete fset1 + TARRAY2_REMOVE(fsetArray1, i1, tsdbTFileSetRemove); + } else if (fset1->fid > fset2->fid) { + // create new file set with fid of fset2->fid + code = tsdbTFileSetInitDup(fs->tsdb, fset2, &fset1); + if (code) return code; + code = TARRAY2_SORT_INSERT(fsetArray1, fset1, tsdbTFileSetCmprFn); + if (code) return code; + i1++; + i2++; + } else { + // edit + code = tsdbTFileSetApplyEdit(fs->tsdb, fset2, fset1); + if (code) return code; + i1++; + i2++; + } + } else if (fset1) { + // delete fset1 + TARRAY2_REMOVE(fsetArray1, i1, tsdbTFileSetRemove); + } else { + // create new file set with fid of fset2->fid + code = tsdbTFileSetInitDup(fs->tsdb, fset2, &fset1); + if (code) return code; + code = TARRAY2_SORT_INSERT(fsetArray1, fset1, tsdbTFileSetCmprFn); + if (code) return code; + i1++; + i2++; + } + } + + return 0; +} + +static int32_t commit_edit(STFileSystem *fs) { + char current[TSDB_FILENAME_LEN]; + char current_t[TSDB_FILENAME_LEN]; + + current_fname(fs->tsdb, current, TSDB_FCURRENT); + if (fs->etype == TSDB_FEDIT_COMMIT) { + current_fname(fs->tsdb, current_t, TSDB_FCURRENT_C); + } else if (fs->etype == TSDB_FEDIT_MERGE) { + current_fname(fs->tsdb, current_t, TSDB_FCURRENT_M); + } else { + ASSERT(0); + } + + int32_t code; + int32_t lino; + if ((code = taosRenameFile(current_t, current))) { + TSDB_CHECK_CODE(code = TAOS_SYSTEM_ERROR(code), lino, _exit); + } + + code = apply_commit(fs); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(fs->tsdb->pVnode), __func__, lino, tstrerror(code)); + } else { + tsdbInfo("vgId:%d %s success, etype:%d", TD_VID(fs->tsdb->pVnode), __func__, fs->etype); + } + return code; +} + +// static int32_t +static int32_t apply_abort(STFileSystem *fs) { + // TODO + return 0; +} + +static int32_t abort_edit(STFileSystem *fs) { + char fname[TSDB_FILENAME_LEN]; + + if (fs->etype == TSDB_FEDIT_COMMIT) { + current_fname(fs->tsdb, fname, TSDB_FCURRENT_C); + } else if (fs->etype == TSDB_FEDIT_MERGE) { + current_fname(fs->tsdb, fname, TSDB_FCURRENT_M); + } else { + ASSERT(0); + } + + int32_t code; + int32_t lino; + if ((code = taosRemoveFile(fname))) { + TSDB_CHECK_CODE(code = TAOS_SYSTEM_ERROR(code), lino, _exit); + } + + code = apply_abort(fs); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + tsdbError("vgId:%d %s failed since %s", TD_VID(fs->tsdb->pVnode), __func__, tstrerror(code)); + } else { + tsdbInfo("vgId:%d %s success, etype:%d", TD_VID(fs->tsdb->pVnode), __func__, fs->etype); + } + return code; +} + +static int32_t tsdbFSScanAndFix(STFileSystem *fs) { + fs->neid = 0; + + // get max commit id + const STFileSet *fset; + TARRAY2_FOREACH(fs->fSetArr, fset) { fs->neid = TMAX(fs->neid, tsdbTFileSetMaxCid(fset)); } + + // TODO + return 0; +} + +static int32_t tsdbFSDupState(STFileSystem *fs) { + int32_t code; + + const TFileSetArray *src = fs->fSetArr; + TFileSetArray *dst = fs->fSetArrTmp; + + TARRAY2_CLEAR(dst, tsdbTFileSetClear); + + const STFileSet *fset1; + TARRAY2_FOREACH(src, fset1) { + STFileSet *fset2; + code = tsdbTFileSetInitDup(fs->tsdb, fset1, &fset2); + if (code) return code; + code = TARRAY2_APPEND(dst, fset2); + if (code) return code; + } + + return 0; +} + +static int32_t open_fs(STFileSystem *fs, int8_t rollback) { + int32_t code = 0; + int32_t lino = 0; + STsdb *pTsdb = fs->tsdb; + + char fCurrent[TSDB_FILENAME_LEN]; + char cCurrent[TSDB_FILENAME_LEN]; + char mCurrent[TSDB_FILENAME_LEN]; + + current_fname(pTsdb, fCurrent, TSDB_FCURRENT); + current_fname(pTsdb, cCurrent, TSDB_FCURRENT_C); + current_fname(pTsdb, mCurrent, TSDB_FCURRENT_M); + + if (taosCheckExistFile(fCurrent)) { // current.json exists + code = load_fs(pTsdb, fCurrent, fs->fSetArr); + TSDB_CHECK_CODE(code, lino, _exit); + + if (taosCheckExistFile(cCurrent)) { + // current.c.json exists + + fs->etype = TSDB_FEDIT_COMMIT; + if (rollback) { + code = abort_edit(fs); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + code = load_fs(pTsdb, cCurrent, fs->fSetArrTmp); + TSDB_CHECK_CODE(code, lino, _exit); + + code = commit_edit(fs); + TSDB_CHECK_CODE(code, lino, _exit); + } + } else if (taosCheckExistFile(mCurrent)) { + // current.m.json exists + fs->etype = TSDB_FEDIT_MERGE; + code = abort_edit(fs); + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbFSDupState(fs); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbFSScanAndFix(fs); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + code = save_fs(fs->fSetArr, fCurrent); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); + } else { + tsdbInfo("vgId:%d %s success", TD_VID(pTsdb->pVnode), __func__); + } + return 0; +} + +static int32_t close_file_system(STFileSystem *fs) { + TARRAY2_CLEAR(fs->fSetArr, tsdbTFileSetClear); + TARRAY2_CLEAR(fs->fSetArrTmp, tsdbTFileSetClear); + // TODO + return 0; +} + +static int32_t apply_edit(STFileSystem *pFS) { + int32_t code = 0; + ASSERTS(0, "TODO: Not implemented yet"); + return code; +} + +static int32_t fset_cmpr_fn(const struct STFileSet *pSet1, const struct STFileSet *pSet2) { + if (pSet1->fid < pSet2->fid) { + return -1; + } else if (pSet1->fid > pSet2->fid) { + return 1; + } + return 0; +} + +static int32_t edit_fs(STFileSystem *fs, const TFileOpArray *opArray) { + int32_t code = 0; + int32_t lino = 0; + + code = tsdbFSDupState(fs); + if (code) return code; + + TFileSetArray *fsetArray = fs->fSetArrTmp; + STFileSet *fset = NULL; + const STFileOp *op; + TARRAY2_FOREACH_PTR(opArray, op) { + if (!fset || fset->fid != op->fid) { + STFileSet tfset = {.fid = op->fid}; + fset = &tfset; + STFileSet **fsetPtr = TARRAY2_SEARCH(fsetArray, &fset, tsdbTFileSetCmprFn, TD_EQ); + fset = (fsetPtr == NULL) ? NULL : *fsetPtr; + + if (!fset) { + code = tsdbTFileSetInit(op->fid, &fset); + TSDB_CHECK_CODE(code, lino, _exit); + + code = TARRAY2_SORT_INSERT(fsetArray, fset, tsdbTFileSetCmprFn); + TSDB_CHECK_CODE(code, lino, _exit); + } + } + + code = tsdbTFileSetEdit(fs->tsdb, fset, op); + TSDB_CHECK_CODE(code, lino, _exit); + } + + // remove empty file set + int32_t i = 0; + while (i < TARRAY2_SIZE(fsetArray)) { + fset = TARRAY2_GET(fsetArray, i); + if (tsdbTFileSetIsEmpty(fset)) { + TARRAY2_REMOVE(fsetArray, i, tsdbTFileSetClear); + } else { + i++; + } + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(fs->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbOpenFS(STsdb *pTsdb, STFileSystem **fs, int8_t rollback) { + int32_t code; + int32_t lino; + + code = tsdbCheckAndUpgradeFileSystem(pTsdb, rollback); + TSDB_CHECK_CODE(code, lino, _exit); + + code = create_fs(pTsdb, fs); + TSDB_CHECK_CODE(code, lino, _exit); + + code = open_fs(fs[0], rollback); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); + destroy_fs(fs); + } else { + tsdbInfo("vgId:%d %s success", TD_VID(pTsdb->pVnode), __func__); + } + return 0; +} + +static void tsdbDoWaitBgTask(STFileSystem *fs, STFSBgTask *task) { + task->numWait++; + taosThreadCondWait(task->done, fs->mutex); + task->numWait--; + + if (task->numWait == 0) { + taosThreadCondDestroy(task->done); + if (task->free) { + task->free(task->arg); + } + taosMemoryFree(task); + } +} + +static void tsdbDoDoneBgTask(STFileSystem *fs, STFSBgTask *task) { + if (task->numWait > 0) { + taosThreadCondBroadcast(task->done); + } else { + taosThreadCondDestroy(task->done); + if (task->free) { + task->free(task->arg); + } + taosMemoryFree(task); + } +} + +int32_t tsdbCloseFS(STFileSystem **fs) { + if (fs[0] == NULL) return 0; + + taosThreadMutexLock(fs[0]->mutex); + fs[0]->stop = true; + + if (fs[0]->bgTaskRunning) { + tsdbDoWaitBgTask(fs[0], fs[0]->bgTaskRunning); + } + taosThreadMutexUnlock(fs[0]->mutex); + + close_file_system(fs[0]); + destroy_fs(fs); + return 0; +} + +int64_t tsdbFSAllocEid(STFileSystem *fs) { + taosThreadRwlockRdlock(&fs->tsdb->rwLock); + int64_t cid = ++fs->neid; + taosThreadRwlockUnlock(&fs->tsdb->rwLock); + return cid; +} + +int32_t tsdbFSEditBegin(STFileSystem *fs, const TFileOpArray *opArray, EFEditT etype) { + int32_t code = 0; + int32_t lino; + char current_t[TSDB_FILENAME_LEN]; + + switch (etype) { + case TSDB_FEDIT_COMMIT: + current_fname(fs->tsdb, current_t, TSDB_FCURRENT_C); + break; + case TSDB_FEDIT_MERGE: + current_fname(fs->tsdb, current_t, TSDB_FCURRENT_M); + break; + default: + ASSERT(0); + } + + tsem_wait(&fs->canEdit); + fs->etype = etype; + + // edit + code = edit_fs(fs, opArray); + TSDB_CHECK_CODE(code, lino, _exit); + + // save fs + code = save_fs(fs->fSetArrTmp, current_t); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s, etype:%d", TD_VID(fs->tsdb->pVnode), __func__, lino, + tstrerror(code), etype); + } else { + tsdbInfo("vgId:%d %s done, etype:%d", TD_VID(fs->tsdb->pVnode), __func__, etype); + } + return code; +} + +int32_t tsdbFSEditCommit(STFileSystem *fs) { + int32_t code = 0; + int32_t lino = 0; + + // commit + code = commit_edit(fs); + TSDB_CHECK_CODE(code, lino, _exit); + + // schedule merge + if (fs->tsdb->pVnode->config.sttTrigger != 1) { + STFileSet *fset; + TARRAY2_FOREACH_REVERSE(fs->fSetArr, fset) { + if (TARRAY2_SIZE(fset->lvlArr) == 0) continue; + + SSttLvl *lvl = TARRAY2_FIRST(fset->lvlArr); + if (lvl->level != 0 || TARRAY2_SIZE(lvl->fobjArr) < fs->tsdb->pVnode->config.sttTrigger) continue; + + code = tsdbFSScheduleBgTask(fs, TSDB_BG_TASK_MERGER, tsdbMerge, NULL, fs->tsdb, NULL); + TSDB_CHECK_CODE(code, lino, _exit); + + break; + } + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(fs->tsdb->pVnode), lino, code); + } else { + tsdbDebug("vgId:%d %s done, etype:%d", TD_VID(fs->tsdb->pVnode), __func__, fs->etype); + tsem_post(&fs->canEdit); + } + return code; +} + +int32_t tsdbFSEditAbort(STFileSystem *fs) { + int32_t code = abort_edit(fs); + tsem_post(&fs->canEdit); + return code; +} + +int32_t tsdbFSGetFSet(STFileSystem *fs, int32_t fid, STFileSet **fset) { + STFileSet tfset = {.fid = fid}; + STFileSet *pset = &tfset; + STFileSet **fsetPtr = TARRAY2_SEARCH(fs->fSetArr, &pset, tsdbTFileSetCmprFn, TD_EQ); + fset[0] = (fsetPtr == NULL) ? NULL : fsetPtr[0]; + return 0; +} + +int32_t tsdbFSCreateCopySnapshot(STFileSystem *fs, TFileSetArray **fsetArr) { + int32_t code = 0; + STFileSet *fset; + STFileSet *fset1; + + fsetArr[0] = taosMemoryMalloc(sizeof(TFileSetArray)); + if (fsetArr == NULL) return TSDB_CODE_OUT_OF_MEMORY; + + TARRAY2_INIT(fsetArr[0]); + + taosThreadRwlockRdlock(&fs->tsdb->rwLock); + TARRAY2_FOREACH(fs->fSetArr, fset) { + code = tsdbTFileSetInitDup(fs->tsdb, fset, &fset1); + if (code) break; + + code = TARRAY2_APPEND(fsetArr[0], fset1); + if (code) break; + } + taosThreadRwlockUnlock(&fs->tsdb->rwLock); + + if (code) { + TARRAY2_DESTROY(fsetArr[0], tsdbTFileSetClear); + taosMemoryFree(fsetArr[0]); + fsetArr[0] = NULL; + } + return code; +} + +int32_t tsdbFSDestroyCopySnapshot(TFileSetArray **fsetArr) { + if (fsetArr[0]) { + TARRAY2_DESTROY(fsetArr[0], tsdbTFileSetClear); + taosMemoryFree(fsetArr[0]); + fsetArr[0] = NULL; + } + return 0; +} + +int32_t tsdbFSCreateRefSnapshot(STFileSystem *fs, TFileSetArray **fsetArr) { + int32_t code = 0; + STFileSet *fset, *fset1; + + fsetArr[0] = taosMemoryCalloc(1, sizeof(*fsetArr[0])); + if (fsetArr[0] == NULL) return TSDB_CODE_OUT_OF_MEMORY; + + taosThreadRwlockRdlock(&fs->tsdb->rwLock); + TARRAY2_FOREACH(fs->fSetArr, fset) { + code = tsdbTFileSetInitRef(fs->tsdb, fset, &fset1); + if (code) break; + + code = TARRAY2_APPEND(fsetArr[0], fset1); + if (code) break; + } + taosThreadRwlockUnlock(&fs->tsdb->rwLock); + + if (code) { + TARRAY2_DESTROY(fsetArr[0], tsdbTFileSetClear); + fsetArr[0] = NULL; + } + return code; +} + +int32_t tsdbFSDestroyRefSnapshot(TFileSetArray **fsetArr) { + if (fsetArr[0]) { + TARRAY2_DESTROY(fsetArr[0], tsdbTFileSetClear); + taosMemoryFreeClear(fsetArr[0]); + fsetArr[0] = NULL; + } + return 0; +} + +const char *gFSBgTaskName[] = {NULL, "MERGE", "RETENTION", "COMPACT"}; + +static int32_t tsdbFSRunBgTask(void *arg) { + STFileSystem *fs = (STFileSystem *)arg; + + ASSERT(fs->bgTaskRunning != NULL); + + fs->bgTaskRunning->launchTime = taosGetTimestampMs(); + fs->bgTaskRunning->run(fs->bgTaskRunning->arg); + fs->bgTaskRunning->finishTime = taosGetTimestampMs(); + + tsdbDebug("vgId:%d bg task:%s task id:%" PRId64 " finished, schedule time:%" PRId64 " launch time:%" PRId64 + " finish time:%" PRId64, + TD_VID(fs->tsdb->pVnode), gFSBgTaskName[fs->bgTaskRunning->type], fs->bgTaskRunning->taskid, + fs->bgTaskRunning->scheduleTime, fs->bgTaskRunning->launchTime, fs->bgTaskRunning->finishTime); + + taosThreadMutexLock(fs->mutex); + + // free last + tsdbDoDoneBgTask(fs, fs->bgTaskRunning); + fs->bgTaskRunning = NULL; + + // schedule next + if (fs->bgTaskNum > 0) { + if (fs->stop) { + while (fs->bgTaskNum > 0) { + STFSBgTask *task = fs->bgTaskQueue->next; + task->prev->next = task->next; + task->next->prev = task->prev; + fs->bgTaskNum--; + tsdbDoDoneBgTask(fs, task); + } + } else { + // pop task from head + fs->bgTaskRunning = fs->bgTaskQueue->next; + fs->bgTaskRunning->prev->next = fs->bgTaskRunning->next; + fs->bgTaskRunning->next->prev = fs->bgTaskRunning->prev; + fs->bgTaskNum--; + vnodeScheduleTaskEx(1, tsdbFSRunBgTask, arg); + } + } + + taosThreadMutexUnlock(fs->mutex); + return 0; +} + +static int32_t tsdbFSScheduleBgTaskImpl(STFileSystem *fs, EFSBgTaskT type, int32_t (*run)(void *), void (*free)(void *), + void *arg, int64_t *taskid) { + if (fs->stop) { + return 0; // TODO: use a better error code + } + + // check if same task is on + // if (fs->bgTaskRunning && fs->bgTaskRunning->type == type) { + // return 0; + // } + + for (STFSBgTask *task = fs->bgTaskQueue->next; task != fs->bgTaskQueue; task = task->next) { + if (task->type == type) { + return 0; + } + } + + // do schedule task + STFSBgTask *task = taosMemoryCalloc(1, sizeof(STFSBgTask)); + if (task == NULL) return TSDB_CODE_OUT_OF_MEMORY; + taosThreadCondInit(task->done, NULL); + + task->type = type; + task->run = run; + task->free = free; + task->arg = arg; + task->scheduleTime = taosGetTimestampMs(); + task->taskid = ++fs->taskid; + + if (fs->bgTaskRunning == NULL && fs->bgTaskNum == 0) { + // launch task directly + fs->bgTaskRunning = task; + vnodeScheduleTaskEx(1, tsdbFSRunBgTask, fs); + } else { + // add to the queue tail + fs->bgTaskNum++; + task->next = fs->bgTaskQueue; + task->prev = fs->bgTaskQueue->prev; + task->prev->next = task; + task->next->prev = task; + } + + if (taskid) *taskid = task->taskid; + return 0; +} + +int32_t tsdbFSScheduleBgTask(STFileSystem *fs, EFSBgTaskT type, int32_t (*run)(void *), void (*free)(void *), void *arg, + int64_t *taskid) { + taosThreadMutexLock(fs->mutex); + int32_t code = tsdbFSScheduleBgTaskImpl(fs, type, run, free, arg, taskid); + taosThreadMutexUnlock(fs->mutex); + return code; +} + +int32_t tsdbFSWaitBgTask(STFileSystem *fs, int64_t taskid) { + STFSBgTask *task = NULL; + + taosThreadMutexLock(fs->mutex); + + if (fs->bgTaskRunning && fs->bgTaskRunning->taskid == taskid) { + task = fs->bgTaskRunning; + } else { + for (STFSBgTask *taskt = fs->bgTaskQueue->next; taskt != fs->bgTaskQueue; taskt = taskt->next) { + if (taskt->taskid == taskid) { + task = taskt; + break; + } + } + } + + if (task) { + tsdbDoWaitBgTask(fs, task); + } + + taosThreadMutexUnlock(fs->mutex); + return 0; +} + +int32_t tsdbFSWaitAllBgTask(STFileSystem *fs) { + taosThreadMutexLock(fs->mutex); + + while (fs->bgTaskRunning) { + taosThreadCondWait(fs->bgTaskRunning->done, fs->mutex); + } + + taosThreadMutexUnlock(fs->mutex); + return 0; +} + +static int32_t tsdbFSDoDisableBgTask(STFileSystem *fs) { + fs->stop = true; + + if (fs->bgTaskRunning) { + tsdbDoWaitBgTask(fs, fs->bgTaskRunning); + } + return 0; +} + +int32_t tsdbFSDisableBgTask(STFileSystem *fs) { + taosThreadMutexLock(fs->mutex); + int32_t code = tsdbFSDoDisableBgTask(fs); + taosThreadMutexUnlock(fs->mutex); + return code; +} + +int32_t tsdbFSEnableBgTask(STFileSystem *fs) { + taosThreadMutexLock(fs->mutex); + fs->stop = false; + taosThreadMutexUnlock(fs->mutex); + return 0; +} diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.h b/source/dnode/vnode/src/tsdb/tsdbFS2.h new file mode 100644 index 0000000000000000000000000000000000000000..e814ab2fffb2a49b76fa2642efd309db0ddd7f2b --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.h @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbFSet2.h" + +#ifndef _TSDB_FILE_SYSTEM_H +#define _TSDB_FILE_SYSTEM_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* Exposed Handle */ +typedef struct STFileSystem STFileSystem; +typedef struct STFSBgTask STFSBgTask; +// typedef TARRAY2(STFileSet *) TFileSetArray; + +typedef enum { + TSDB_FEDIT_COMMIT = 1, // + TSDB_FEDIT_MERGE +} EFEditT; + +typedef enum { + TSDB_BG_TASK_MERGER = 1, + TSDB_BG_TASK_RETENTION, + TSDB_BG_TASK_COMPACT, +} EFSBgTaskT; + +typedef enum { + TSDB_FCURRENT = 1, + TSDB_FCURRENT_C, // for commit + TSDB_FCURRENT_M, // for merge +} EFCurrentT; + +/* Exposed APIs */ +// open/close +int32_t tsdbOpenFS(STsdb *pTsdb, STFileSystem **fs, int8_t rollback); +int32_t tsdbCloseFS(STFileSystem **fs); +// snapshot +int32_t tsdbFSCreateCopySnapshot(STFileSystem *fs, TFileSetArray **fsetArr); +int32_t tsdbFSDestroyCopySnapshot(TFileSetArray **fsetArr); +int32_t tsdbFSCreateRefSnapshot(STFileSystem *fs, TFileSetArray **fsetArr); +int32_t tsdbFSDestroyRefSnapshot(TFileSetArray **fsetArr); +// txn +int64_t tsdbFSAllocEid(STFileSystem *fs); +int32_t tsdbFSEditBegin(STFileSystem *fs, const TFileOpArray *opArray, EFEditT etype); +int32_t tsdbFSEditCommit(STFileSystem *fs); +int32_t tsdbFSEditAbort(STFileSystem *fs); +// background task +int32_t tsdbFSScheduleBgTask(STFileSystem *fs, EFSBgTaskT type, int32_t (*run)(void *), void (*free)(void *), void *arg, + int64_t *taskid); +int32_t tsdbFSWaitBgTask(STFileSystem *fs, int64_t taskid); +int32_t tsdbFSWaitAllBgTask(STFileSystem *fs); +int32_t tsdbFSDisableBgTask(STFileSystem *fs); +int32_t tsdbFSEnableBgTask(STFileSystem *fs); +// other +int32_t tsdbFSGetFSet(STFileSystem *fs, int32_t fid, STFileSet **fset); + +struct STFSBgTask { + EFSBgTaskT type; + int32_t (*run)(void *arg); + void (*free)(void *arg); + void *arg; + + TdThreadCond done[1]; + int32_t numWait; + + int64_t taskid; + int64_t scheduleTime; + int64_t launchTime; + int64_t finishTime; + + struct STFSBgTask *prev; + struct STFSBgTask *next; +}; + +/* Exposed Structs */ +struct STFileSystem { + STsdb *tsdb; + tsem_t canEdit; + int32_t state; + int64_t neid; + EFEditT etype; + TFileSetArray fSetArr[1]; + TFileSetArray fSetArrTmp[1]; + + // background task queue + TdThreadMutex mutex[1]; + bool stop; + int64_t taskid; + int32_t bgTaskNum; + STFSBgTask bgTaskQueue[1]; + STFSBgTask *bgTaskRunning; +}; + +#ifdef __cplusplus +} +#endif + +#endif /*_TSDB_FILE_SYSTEM_H*/ diff --git a/source/dnode/vnode/src/tsdb/tsdbFSet2.c b/source/dnode/vnode/src/tsdb/tsdbFSet2.c new file mode 100644 index 0000000000000000000000000000000000000000..7bc9743ecb726c9305572ba0ab0db184355bbc08 --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbFSet2.c @@ -0,0 +1,542 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbFSet2.h" + +int32_t tsdbSttLvlInit(int32_t level, SSttLvl **lvl) { + if (!(lvl[0] = taosMemoryMalloc(sizeof(SSttLvl)))) return TSDB_CODE_OUT_OF_MEMORY; + lvl[0]->level = level; + TARRAY2_INIT(lvl[0]->fobjArr); + return 0; +} + +static void tsdbSttLvlClearFObj(void *data) { tsdbTFileObjUnref(*(STFileObj **)data); } + +int32_t tsdbSttLvlClear(SSttLvl **lvl) { + if (lvl[0] != NULL) { + TARRAY2_DESTROY(lvl[0]->fobjArr, tsdbSttLvlClearFObj); + taosMemoryFree(lvl[0]); + lvl[0] = NULL; + } + return 0; +} + +static int32_t tsdbSttLvlInitEx(STsdb *pTsdb, const SSttLvl *lvl1, SSttLvl **lvl) { + int32_t code = tsdbSttLvlInit(lvl1->level, lvl); + if (code) return code; + + const STFileObj *fobj1; + TARRAY2_FOREACH(lvl1->fobjArr, fobj1) { + STFileObj *fobj; + code = tsdbTFileObjInit(pTsdb, fobj1->f, &fobj); + if (code) { + tsdbSttLvlClear(lvl); + return code; + } + + TARRAY2_APPEND(lvl[0]->fobjArr, fobj); + } + return 0; +} + +static int32_t tsdbSttLvlInitRef(STsdb *pTsdb, const SSttLvl *lvl1, SSttLvl **lvl) { + int32_t code = tsdbSttLvlInit(lvl1->level, lvl); + if (code) return code; + + STFileObj *fobj1; + TARRAY2_FOREACH(lvl1->fobjArr, fobj1) { + tsdbTFileObjRef(fobj1); + code = TARRAY2_APPEND(lvl[0]->fobjArr, fobj1); + if (code) return code; + } + return 0; +} + +static void tsdbSttLvlRemoveFObj(void *data) { tsdbTFileObjRemove(*(STFileObj **)data); } +static void tsdbSttLvlRemove(SSttLvl **lvl) { + TARRAY2_DESTROY(lvl[0]->fobjArr, tsdbSttLvlRemoveFObj); + taosMemoryFree(lvl[0]); + lvl[0] = NULL; +} + +static int32_t tsdbSttLvlApplyEdit(STsdb *pTsdb, const SSttLvl *lvl1, SSttLvl *lvl2) { + int32_t code = 0; + + ASSERT(lvl1->level == lvl2->level); + + int32_t i1 = 0, i2 = 0; + while (i1 < TARRAY2_SIZE(lvl1->fobjArr) || i2 < TARRAY2_SIZE(lvl2->fobjArr)) { + STFileObj *fobj1 = i1 < TARRAY2_SIZE(lvl1->fobjArr) ? TARRAY2_GET(lvl1->fobjArr, i1) : NULL; + STFileObj *fobj2 = i2 < TARRAY2_SIZE(lvl2->fobjArr) ? TARRAY2_GET(lvl2->fobjArr, i2) : NULL; + + if (fobj1 && fobj2) { + if (fobj1->f->cid < fobj2->f->cid) { + // create a file obj + code = tsdbTFileObjInit(pTsdb, fobj1->f, &fobj2); + if (code) return code; + code = TARRAY2_APPEND(lvl2->fobjArr, fobj2); + if (code) return code; + i1++; + i2++; + } else if (fobj1->f->cid > fobj2->f->cid) { + // remove a file obj + TARRAY2_REMOVE(lvl2->fobjArr, i2, tsdbSttLvlRemoveFObj); + } else { + if (tsdbIsSameTFile(fobj1->f, fobj2->f)) { + if (tsdbIsTFileChanged(fobj1->f, fobj2->f)) { + fobj2->f[0] = fobj1->f[0]; + } + } else { + TARRAY2_REMOVE(lvl2->fobjArr, i2, tsdbSttLvlRemoveFObj); + code = tsdbTFileObjInit(pTsdb, fobj1->f, &fobj2); + if (code) return code; + code = TARRAY2_SORT_INSERT(lvl2->fobjArr, fobj2, tsdbTFileObjCmpr); + if (code) return code; + } + i1++; + i2++; + } + } else if (fobj1) { + // create a file obj + code = tsdbTFileObjInit(pTsdb, fobj1->f, &fobj2); + if (code) return code; + code = TARRAY2_APPEND(lvl2->fobjArr, fobj2); + if (code) return code; + i1++; + i2++; + } else { + // remove a file obj + TARRAY2_REMOVE(lvl2->fobjArr, i2, tsdbSttLvlRemoveFObj); + } + } + return 0; +} + +static int32_t tsdbSttLvlCmprFn(const SSttLvl **lvl1, const SSttLvl **lvl2) { + if (lvl1[0]->level < lvl2[0]->level) return -1; + if (lvl1[0]->level > lvl2[0]->level) return 1; + return 0; +} + +static int32_t tsdbSttLvlToJson(const SSttLvl *lvl, cJSON *json) { + if (cJSON_AddNumberToObject(json, "level", lvl->level) == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + cJSON *ajson = cJSON_AddArrayToObject(json, "files"); + if (ajson == NULL) return TSDB_CODE_OUT_OF_MEMORY; + const STFileObj *fobj; + TARRAY2_FOREACH(lvl->fobjArr, fobj) { + cJSON *item = cJSON_CreateObject(); + if (item == NULL) return TSDB_CODE_OUT_OF_MEMORY; + cJSON_AddItemToArray(ajson, item); + + int32_t code = tsdbTFileToJson(fobj->f, item); + if (code) return code; + } + + return 0; +} + +static int32_t tsdbJsonToSttLvl(STsdb *pTsdb, const cJSON *json, SSttLvl **lvl) { + const cJSON *item1, *item2; + int32_t level; + + item1 = cJSON_GetObjectItem(json, "level"); + if (cJSON_IsNumber(item1)) { + level = item1->valuedouble; + } else { + return TSDB_CODE_FILE_CORRUPTED; + } + + int32_t code = tsdbSttLvlInit(level, lvl); + if (code) return code; + + item1 = cJSON_GetObjectItem(json, "files"); + if (!cJSON_IsArray(item1)) { + tsdbSttLvlClear(lvl); + return TSDB_CODE_FILE_CORRUPTED; + } + + cJSON_ArrayForEach(item2, item1) { + STFile tf; + code = tsdbJsonToTFile(item2, TSDB_FTYPE_STT, &tf); + if (code) { + tsdbSttLvlClear(lvl); + return code; + } + + STFileObj *fobj; + code = tsdbTFileObjInit(pTsdb, &tf, &fobj); + if (code) { + tsdbSttLvlClear(lvl); + return code; + } + + TARRAY2_APPEND(lvl[0]->fobjArr, fobj); + } + return 0; +} + +int32_t tsdbTFileSetToJson(const STFileSet *fset, cJSON *json) { + int32_t code = 0; + cJSON *item1, *item2; + + // fid + if (cJSON_AddNumberToObject(json, "fid", fset->fid) == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + for (int32_t ftype = TSDB_FTYPE_MIN; ftype < TSDB_FTYPE_MAX; ++ftype) { + if (fset->farr[ftype] == NULL) continue; + + code = tsdbTFileToJson(fset->farr[ftype]->f, json); + if (code) return code; + } + + // each level + item1 = cJSON_AddArrayToObject(json, "stt lvl"); + if (item1 == NULL) return TSDB_CODE_OUT_OF_MEMORY; + const SSttLvl *lvl; + TARRAY2_FOREACH(fset->lvlArr, lvl) { + item2 = cJSON_CreateObject(); + if (!item2) return TSDB_CODE_OUT_OF_MEMORY; + cJSON_AddItemToArray(item1, item2); + + code = tsdbSttLvlToJson(lvl, item2); + if (code) return code; + } + + return 0; +} + +int32_t tsdbJsonToTFileSet(STsdb *pTsdb, const cJSON *json, STFileSet **fset) { + int32_t code; + const cJSON *item1, *item2; + int32_t fid; + STFile tf; + + // fid + item1 = cJSON_GetObjectItem(json, "fid"); + if (cJSON_IsNumber(item1)) { + fid = item1->valuedouble; + } else { + return TSDB_CODE_FILE_CORRUPTED; + } + + code = tsdbTFileSetInit(fid, fset); + if (code) return code; + + for (tsdb_ftype_t ftype = TSDB_FTYPE_MIN; ftype < TSDB_FTYPE_MAX; ++ftype) { + code = tsdbJsonToTFile(json, ftype, &tf); + if (code == TSDB_CODE_NOT_FOUND) { + continue; + } else if (code) { + tsdbTFileSetClear(fset); + return code; + } else { + code = tsdbTFileObjInit(pTsdb, &tf, &(*fset)->farr[ftype]); + if (code) return code; + } + } + + // each level + item1 = cJSON_GetObjectItem(json, "stt lvl"); + if (cJSON_IsArray(item1)) { + cJSON_ArrayForEach(item2, item1) { + SSttLvl *lvl; + code = tsdbJsonToSttLvl(pTsdb, item2, &lvl); + if (code) { + tsdbTFileSetClear(fset); + return code; + } + + TARRAY2_APPEND((*fset)->lvlArr, lvl); + } + } else { + return TSDB_CODE_FILE_CORRUPTED; + } + + return 0; +} + +// NOTE: the api does not remove file, only do memory operation +int32_t tsdbTFileSetEdit(STsdb *pTsdb, STFileSet *fset, const STFileOp *op) { + int32_t code = 0; + + if (op->optype == TSDB_FOP_CREATE) { + // create a new file + STFileObj *fobj; + code = tsdbTFileObjInit(pTsdb, &op->nf, &fobj); + if (code) return code; + + if (fobj->f->type == TSDB_FTYPE_STT) { + SSttLvl *lvl = tsdbTFileSetGetSttLvl(fset, fobj->f->stt->level); + if (!lvl) { + code = tsdbSttLvlInit(fobj->f->stt->level, &lvl); + if (code) return code; + + code = TARRAY2_SORT_INSERT(fset->lvlArr, lvl, tsdbSttLvlCmprFn); + if (code) return code; + } + + code = TARRAY2_SORT_INSERT(lvl->fobjArr, fobj, tsdbTFileObjCmpr); + if (code) return code; + } else { + ASSERT(fset->farr[fobj->f->type] == NULL); + fset->farr[fobj->f->type] = fobj; + } + } else if (op->optype == TSDB_FOP_REMOVE) { + // delete a file + if (op->of.type == TSDB_FTYPE_STT) { + SSttLvl *lvl = tsdbTFileSetGetSttLvl(fset, op->of.stt->level); + ASSERT(lvl); + + STFileObj tfobj = {.f[0] = {.cid = op->of.cid}}; + STFileObj *tfobjp = &tfobj; + int32_t idx = TARRAY2_SEARCH_IDX(lvl->fobjArr, &tfobjp, tsdbTFileObjCmpr, TD_EQ); + ASSERT(idx >= 0); + TARRAY2_REMOVE(lvl->fobjArr, idx, tsdbSttLvlClearFObj); + + if (TARRAY2_SIZE(lvl->fobjArr) == 0) { + // TODO: remove the stt level if no file exists anymore + // TARRAY2_REMOVE(&fset->lvlArr, lvl - fset->lvlArr.data, tsdbSttLvlClear); + } + } else { + ASSERT(tsdbIsSameTFile(&op->of, fset->farr[op->of.type]->f)); + tsdbTFileObjUnref(fset->farr[op->of.type]); + fset->farr[op->of.type] = NULL; + } + } else { + if (op->nf.type == TSDB_FTYPE_STT) { + SSttLvl *lvl = tsdbTFileSetGetSttLvl(fset, op->of.stt->level); + ASSERT(lvl); + + STFileObj tfobj = {.f[0] = {.cid = op->of.cid}}, *tfobjp = &tfobj; + STFileObj **fobjPtr = TARRAY2_SEARCH(lvl->fobjArr, &tfobjp, tsdbTFileObjCmpr, TD_EQ); + tfobjp = (fobjPtr ? *fobjPtr : NULL); + + ASSERT(tfobjp); + + tfobjp->f[0] = op->nf; + } else { + fset->farr[op->nf.type]->f[0] = op->nf; + } + } + + return 0; +} + +int32_t tsdbTFileSetApplyEdit(STsdb *pTsdb, const STFileSet *fset1, STFileSet *fset2) { + int32_t code = 0; + + ASSERT(fset1->fid == fset2->fid); + + for (tsdb_ftype_t ftype = TSDB_FTYPE_MIN; ftype < TSDB_FTYPE_MAX; ++ftype) { + if (!fset1->farr[ftype] && !fset2->farr[ftype]) continue; + + STFileObj *fobj1 = fset1->farr[ftype]; + STFileObj *fobj2 = fset2->farr[ftype]; + + if (fobj1 && fobj2) { + if (tsdbIsSameTFile(fobj1->f, fobj2->f)) { + if (tsdbIsTFileChanged(fobj1->f, fobj2->f)) { + fobj2->f[0] = fobj1->f[0]; + } + } else { + tsdbTFileObjRemove(fobj2); + code = tsdbTFileObjInit(pTsdb, fobj1->f, &fset2->farr[ftype]); + if (code) return code; + } + } else if (fobj1) { + // create a new file + code = tsdbTFileObjInit(pTsdb, fobj1->f, &fset2->farr[ftype]); + if (code) return code; + } else { + // remove the file + tsdbTFileObjRemove(fobj2); + fset2->farr[ftype] = NULL; + } + } + + // stt part + int32_t i1 = 0, i2 = 0; + while (i1 < TARRAY2_SIZE(fset1->lvlArr) || i2 < TARRAY2_SIZE(fset2->lvlArr)) { + SSttLvl *lvl1 = i1 < TARRAY2_SIZE(fset1->lvlArr) ? TARRAY2_GET(fset1->lvlArr, i1) : NULL; + SSttLvl *lvl2 = i2 < TARRAY2_SIZE(fset2->lvlArr) ? TARRAY2_GET(fset2->lvlArr, i2) : NULL; + + if (lvl1 && lvl2) { + if (lvl1->level < lvl2->level) { + // add a new stt level + code = tsdbSttLvlInitEx(pTsdb, lvl1, &lvl2); + if (code) return code; + code = TARRAY2_SORT_INSERT(fset2->lvlArr, lvl2, tsdbSttLvlCmprFn); + if (code) return code; + i1++; + i2++; + } else if (lvl1->level > lvl2->level) { + // remove the stt level + TARRAY2_REMOVE(fset2->lvlArr, i2, tsdbSttLvlRemove); + } else { + // apply edit on stt level + code = tsdbSttLvlApplyEdit(pTsdb, lvl1, lvl2); + if (code) return code; + i1++; + i2++; + } + } else if (lvl1) { + // add a new stt level + code = tsdbSttLvlInitEx(pTsdb, lvl1, &lvl2); + if (code) return code; + code = TARRAY2_SORT_INSERT(fset2->lvlArr, lvl2, tsdbSttLvlCmprFn); + if (code) return code; + i1++; + i2++; + } else { + // remove the stt level + TARRAY2_REMOVE(fset2->lvlArr, i2, tsdbSttLvlRemove); + } + } + + return 0; +} + +int32_t tsdbTFileSetInit(int32_t fid, STFileSet **fset) { + fset[0] = taosMemoryCalloc(1, sizeof(STFileSet)); + if (fset[0] == NULL) return TSDB_CODE_OUT_OF_MEMORY; + + fset[0]->fid = fid; + TARRAY2_INIT(fset[0]->lvlArr); + return 0; +} + +int32_t tsdbTFileSetInitDup(STsdb *pTsdb, const STFileSet *fset1, STFileSet **fset) { + int32_t code = tsdbTFileSetInit(fset1->fid, fset); + if (code) return code; + + for (int32_t ftype = TSDB_FTYPE_MIN; ftype < TSDB_FTYPE_MAX; ++ftype) { + if (fset1->farr[ftype] == NULL) continue; + + code = tsdbTFileObjInit(pTsdb, fset1->farr[ftype]->f, &fset[0]->farr[ftype]); + if (code) { + tsdbTFileSetClear(fset); + return code; + } + } + + const SSttLvl *lvl1; + TARRAY2_FOREACH(fset1->lvlArr, lvl1) { + SSttLvl *lvl; + code = tsdbSttLvlInitEx(pTsdb, lvl1, &lvl); + if (code) { + tsdbTFileSetClear(fset); + return code; + } + + code = TARRAY2_APPEND(fset[0]->lvlArr, lvl); + if (code) return code; + } + + return 0; +} + +int32_t tsdbTFileSetInitRef(STsdb *pTsdb, const STFileSet *fset1, STFileSet **fset) { + int32_t code = tsdbTFileSetInit(fset1->fid, fset); + if (code) return code; + + for (int32_t ftype = TSDB_FTYPE_MIN; ftype < TSDB_FTYPE_MAX; ++ftype) { + if (fset1->farr[ftype] == NULL) continue; + + tsdbTFileObjRef(fset1->farr[ftype]); + fset[0]->farr[ftype] = fset1->farr[ftype]; + } + + const SSttLvl *lvl1; + TARRAY2_FOREACH(fset1->lvlArr, lvl1) { + SSttLvl *lvl; + code = tsdbSttLvlInitRef(pTsdb, lvl1, &lvl); + if (code) { + tsdbTFileSetClear(fset); + return code; + } + + code = TARRAY2_APPEND(fset[0]->lvlArr, lvl); + if (code) return code; + } + + return 0; +} + +int32_t tsdbTFileSetClear(STFileSet **fset) { + if (!fset[0]) return 0; + + for (tsdb_ftype_t ftype = TSDB_FTYPE_MIN; ftype < TSDB_FTYPE_MAX; ++ftype) { + if (fset[0]->farr[ftype] == NULL) continue; + tsdbTFileObjUnref(fset[0]->farr[ftype]); + } + + TARRAY2_DESTROY(fset[0]->lvlArr, tsdbSttLvlClear); + + taosMemoryFree(fset[0]); + fset[0] = NULL; + + return 0; +} + +int32_t tsdbTFileSetRemove(STFileSet **fset) { + for (tsdb_ftype_t ftype = TSDB_FTYPE_MIN; ftype < TSDB_FTYPE_MAX; ++ftype) { + if (fset[0]->farr[ftype] == NULL) continue; + tsdbTFileObjRemove(fset[0]->farr[ftype]); + } + + TARRAY2_DESTROY(fset[0]->lvlArr, tsdbSttLvlRemove); + taosMemoryFree(fset[0]); + fset[0] = NULL; + return 0; +} + +SSttLvl *tsdbTFileSetGetSttLvl(STFileSet *fset, int32_t level) { + SSttLvl sttLvl = {.level = level}; + SSttLvl *lvl = &sttLvl; + SSttLvl **lvlPtr = TARRAY2_SEARCH(fset->lvlArr, &lvl, tsdbSttLvlCmprFn, TD_EQ); + return lvlPtr ? lvlPtr[0] : NULL; +} + +int32_t tsdbTFileSetCmprFn(const STFileSet **fset1, const STFileSet **fset2) { + if (fset1[0]->fid < fset2[0]->fid) return -1; + if (fset1[0]->fid > fset2[0]->fid) return 1; + return 0; +} + +int64_t tsdbTFileSetMaxCid(const STFileSet *fset) { + int64_t maxCid = 0; + for (tsdb_ftype_t ftype = TSDB_FTYPE_MIN; ftype < TSDB_FTYPE_MAX; ++ftype) { + if (fset->farr[ftype] == NULL) continue; + maxCid = TMAX(maxCid, fset->farr[ftype]->f->cid); + } + const SSttLvl *lvl; + const STFileObj *fobj; + TARRAY2_FOREACH(fset->lvlArr, lvl) { + TARRAY2_FOREACH(lvl->fobjArr, fobj) { maxCid = TMAX(maxCid, fobj->f->cid); } + } + return maxCid; +} + +bool tsdbTFileSetIsEmpty(const STFileSet *fset) { + for (tsdb_ftype_t ftype = TSDB_FTYPE_MIN; ftype < TSDB_FTYPE_MAX; ++ftype) { + if (fset->farr[ftype] != NULL) return false; + } + return TARRAY2_SIZE(fset->lvlArr) == 0; +} \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbFSet2.h b/source/dnode/vnode/src/tsdb/tsdbFSet2.h new file mode 100644 index 0000000000000000000000000000000000000000..d7b3c1fc8cb3478b8ad6fe85d94ec7a992dad0c8 --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbFSet2.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbFile2.h" + +#ifndef _TSDB_FILE_SET2_H +#define _TSDB_FILE_SET2_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct STFileSet STFileSet; +typedef struct STFileOp STFileOp; +typedef struct SSttLvl SSttLvl; +typedef TARRAY2(STFileObj *) TFileObjArray; +typedef TARRAY2(SSttLvl *) TSttLvlArray; +typedef TARRAY2(STFileOp) TFileOpArray; + +typedef enum { + TSDB_FOP_NONE = 0, + TSDB_FOP_CREATE, + TSDB_FOP_REMOVE, + TSDB_FOP_MODIFY, +} tsdb_fop_t; + +#define TFILE_SET(fid_) \ + (STFileSet) { .fid = (fid_) } + +// init/clear +int32_t tsdbTFileSetInit(int32_t fid, STFileSet **fset); +int32_t tsdbTFileSetInitDup(STsdb *pTsdb, const STFileSet *fset1, STFileSet **fset); +int32_t tsdbTFileSetInitRef(STsdb *pTsdb, const STFileSet *fset1, STFileSet **fset); +int32_t tsdbTFileSetClear(STFileSet **fset); +int32_t tsdbTFileSetRemove(STFileSet **fset); +// to/from json +int32_t tsdbTFileSetToJson(const STFileSet *fset, cJSON *json); +int32_t tsdbJsonToTFileSet(STsdb *pTsdb, const cJSON *json, STFileSet **fset); +// cmpr +int32_t tsdbTFileSetCmprFn(const STFileSet **fset1, const STFileSet **fset2); +// edit +int32_t tsdbTFileSetEdit(STsdb *pTsdb, STFileSet *fset, const STFileOp *op); +int32_t tsdbTFileSetApplyEdit(STsdb *pTsdb, const STFileSet *fset1, STFileSet *fset); +// max commit id +int64_t tsdbTFileSetMaxCid(const STFileSet *fset); +// get +SSttLvl *tsdbTFileSetGetSttLvl(STFileSet *fset, int32_t level); +// is empty +bool tsdbTFileSetIsEmpty(const STFileSet *fset); + +struct STFileOp { + tsdb_fop_t optype; + int32_t fid; + STFile of; // old file state + STFile nf; // new file state +}; + +struct SSttLvl { + int32_t level; + TFileObjArray fobjArr[1]; +}; + +struct STFileSet { + int32_t fid; + STFileObj *farr[TSDB_FTYPE_MAX]; // file array + TSttLvlArray lvlArr[1]; // level array +}; + +#ifdef __cplusplus +} +#endif + +#endif /*_TSDB_FILE_SET2_H*/ \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbFSetRW.c b/source/dnode/vnode/src/tsdb/tsdbFSetRW.c new file mode 100644 index 0000000000000000000000000000000000000000..83ae8c24291542f179a53fe9d7e2215c8f4ca8ca --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbFSetRW.c @@ -0,0 +1,295 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbFSetRW.h" + +// SFSetWriter ================================================== +struct SFSetWriter { + SFSetWriterConfig config[1]; + + SSkmInfo skmTb[1]; + SSkmInfo skmRow[1]; + uint8_t *bufArr[10]; + + struct { + TABLEID tbid[1]; + } ctx[1]; + + // writer + SBlockData blockData[2]; + int32_t blockDataIdx; + SDataFileWriter *dataWriter; + SSttFileWriter *sttWriter; +}; + +static int32_t tsdbFSetWriteTableDataBegin(SFSetWriter *writer, const TABLEID *tbid) { + int32_t code = 0; + int32_t lino = 0; + + writer->ctx->tbid->suid = tbid->suid; + writer->ctx->tbid->uid = tbid->uid; + + code = tsdbUpdateSkmTb(writer->config->tsdb, writer->ctx->tbid, writer->skmTb); + TSDB_CHECK_CODE(code, lino, _exit); + + writer->blockDataIdx = 0; + for (int32_t i = 0; i < ARRAY_SIZE(writer->blockData); i++) { + code = tBlockDataInit(&writer->blockData[i], writer->ctx->tbid, writer->skmTb->pTSchema, NULL, 0); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbFSetWriteTableDataEnd(SFSetWriter *writer) { + if (writer->ctx->tbid->uid == 0) return 0; + + int32_t code = 0; + int32_t lino = 0; + + int32_t cidx = writer->blockDataIdx; + int32_t pidx = ((cidx + 1) & 1); + int32_t numRow = ((writer->blockData[pidx].nRow + writer->blockData[cidx].nRow) >> 1); + + if (writer->blockData[pidx].nRow > 0 && numRow >= writer->config->minRow) { + ASSERT(writer->blockData[pidx].nRow == writer->config->maxRow); + + SRowInfo row = { + .suid = writer->ctx->tbid->suid, + .uid = writer->ctx->tbid->uid, + .row = tsdbRowFromBlockData(writer->blockData + pidx, 0), + }; + + for (int32_t i = 0; i < numRow; i++) { + row.row.iRow = i; + + code = tsdbDataFileWriteRow(writer->dataWriter, &row); + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbDataFileFlush(writer->dataWriter); + TSDB_CHECK_CODE(code, lino, _exit); + + for (int32_t i = numRow; i < writer->blockData[pidx].nRow; i++) { + row.row.iRow = i; + code = tsdbDataFileWriteRow(writer->dataWriter, &row); + TSDB_CHECK_CODE(code, lino, _exit); + } + + row.row = tsdbRowFromBlockData(writer->blockData + cidx, 0); + for (int32_t i = 0; i < writer->blockData[cidx].nRow; i++) { + row.row.iRow = i; + code = tsdbDataFileWriteRow(writer->dataWriter, &row); + TSDB_CHECK_CODE(code, lino, _exit); + } + } else { + // pidx + if (writer->blockData[pidx].nRow > 0) { + code = tsdbDataFileWriteBlockData(writer->dataWriter, &writer->blockData[pidx]); + TSDB_CHECK_CODE(code, lino, _exit); + } + + // cidx + if (writer->blockData[cidx].nRow < writer->config->minRow) { + code = tsdbSttFileWriteBlockData(writer->sttWriter, &writer->blockData[cidx]); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + code = tsdbDataFileWriteBlockData(writer->dataWriter, &writer->blockData[cidx]); + TSDB_CHECK_CODE(code, lino, _exit); + } + } + + for (int32_t i = 0; i < ARRAY_SIZE(writer->blockData); i++) { + tBlockDataReset(&writer->blockData[i]); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbFSetWriterOpen(SFSetWriterConfig *config, SFSetWriter **writer) { + int32_t code = 0; + int32_t lino = 0; + + writer[0] = taosMemoryCalloc(1, sizeof(*writer[0])); + if (writer[0] == NULL) return TSDB_CODE_OUT_OF_MEMORY; + + writer[0]->config[0] = config[0]; + + // data writer + if (!config->toSttOnly) { + SDataFileWriterConfig dataWriterConfig = { + .tsdb = config->tsdb, + .cmprAlg = config->cmprAlg, + .maxRow = config->maxRow, + .szPage = config->szPage, + .fid = config->fid, + .cid = config->cid, + .did = config->did, + .compactVersion = config->compactVersion, + .skmTb = writer[0]->skmTb, + .skmRow = writer[0]->skmRow, + .bufArr = writer[0]->bufArr, + }; + for (int32_t ftype = 0; ftype < TSDB_FTYPE_MAX; ++ftype) { + dataWriterConfig.files[ftype].exist = config->files[ftype].exist; + dataWriterConfig.files[ftype].file = config->files[ftype].file; + } + + code = tsdbDataFileWriterOpen(&dataWriterConfig, &writer[0]->dataWriter); + TSDB_CHECK_CODE(code, lino, _exit); + } + + // stt writer + SSttFileWriterConfig sttWriterConfig = { + .tsdb = config->tsdb, + .maxRow = config->maxRow, + .szPage = config->szPage, + .cmprAlg = config->cmprAlg, + .compactVersion = config->compactVersion, + .did = config->did, + .fid = config->fid, + .cid = config->cid, + .level = config->level, + .skmTb = writer[0]->skmTb, + .skmRow = writer[0]->skmRow, + .bufArr = writer[0]->bufArr, + }; + code = tsdbSttFileWriterOpen(&sttWriterConfig, &writer[0]->sttWriter); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbFSetWriterClose(SFSetWriter **writer, bool abort, TFileOpArray *fopArr) { + if (writer[0] == NULL) return 0; + + int32_t code = 0; + int32_t lino = 0; + + STsdb *tsdb = writer[0]->config->tsdb; + + // end + if (!writer[0]->config->toSttOnly) { + code = tsdbFSetWriteTableDataEnd(writer[0]); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbDataFileWriterClose(&writer[0]->dataWriter, abort, fopArr); + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbSttFileWriterClose(&writer[0]->sttWriter, abort, fopArr); + TSDB_CHECK_CODE(code, lino, _exit); + + // free + for (int32_t i = 0; i < ARRAY_SIZE(writer[0]->blockData); i++) { + tBlockDataDestroy(&writer[0]->blockData[i]); + } + for (int32_t i = 0; i < ARRAY_SIZE(writer[0]->bufArr); i++) { + tFree(writer[0]->bufArr[i]); + } + tDestroyTSchema(writer[0]->skmRow->pTSchema); + tDestroyTSchema(writer[0]->skmTb->pTSchema); + taosMemoryFree(writer[0]); + writer[0] = NULL; + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbFSetWriteRow(SFSetWriter *writer, SRowInfo *row) { + int32_t code = 0; + int32_t lino = 0; + + if (writer->config->toSttOnly) { + code = tsdbSttFileWriteRow(writer->sttWriter, row); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + if (writer->ctx->tbid->uid != row->uid) { + code = tsdbFSetWriteTableDataEnd(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbFSetWriteTableDataBegin(writer, (TABLEID *)row); + TSDB_CHECK_CODE(code, lino, _exit); + } + + if (row->row.type == TSDBROW_ROW_FMT) { + code = tsdbUpdateSkmRow(writer->config->tsdb, writer->ctx->tbid, TSDBROW_SVERSION(&row->row), writer->skmRow); + TSDB_CHECK_CODE(code, lino, _exit); + } + + TSDBKEY key = TSDBROW_KEY(&row->row); + if (key.version <= writer->config->compactVersion // + && writer->blockData[writer->blockDataIdx].nRow > 0 // + && key.ts == writer->blockData[writer->blockDataIdx].aTSKEY[writer->blockData[writer->blockDataIdx].nRow - 1]) { + code = tBlockDataUpdateRow(&writer->blockData[writer->blockDataIdx], &row->row, writer->skmRow->pTSchema); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + if (writer->blockData[writer->blockDataIdx].nRow >= writer->config->maxRow) { + int32_t idx = ((writer->blockDataIdx + 1) & 1); + if (writer->blockData[idx].nRow >= writer->config->maxRow) { + code = tsdbDataFileWriteBlockData(writer->dataWriter, &writer->blockData[idx]); + TSDB_CHECK_CODE(code, lino, _exit); + + tBlockDataClear(&writer->blockData[idx]); + } + writer->blockDataIdx = idx; + } + + code = + tBlockDataAppendRow(&writer->blockData[writer->blockDataIdx], &row->row, writer->skmRow->pTSchema, row->uid); + TSDB_CHECK_CODE(code, lino, _exit); + } + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbFSetWriteTombRecord(SFSetWriter *writer, const STombRecord *tombRecord) { + int32_t code = 0; + int32_t lino = 0; + + if (writer->config->toSttOnly || tsdbSttFileWriterIsOpened(writer->sttWriter)) { + code = tsdbSttFileWriteTombRecord(writer->sttWriter, tombRecord); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + code = tsdbDataFileWriteTombRecord(writer->dataWriter, tombRecord); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbFSetRW.h b/source/dnode/vnode/src/tsdb/tsdbFSetRW.h new file mode 100644 index 0000000000000000000000000000000000000000..b5710407cfe40e28736c2949d3f3421e131c6624 --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbFSetRW.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbDataFileRW.h" +#include "tsdbSttFileRW.h" + +#ifndef _TSDB_FSET_RW_H +#define _TSDB_FSET_RW_H + +#ifdef __cplusplus +extern "C" { +#endif + +// +typedef struct SFSetWriter SFSetWriter; +typedef struct { + STsdb *tsdb; + bool toSttOnly; + int64_t compactVersion; + int32_t minRow; + int32_t maxRow; + int32_t szPage; + int8_t cmprAlg; + int32_t fid; + int64_t cid; + SDiskID did; + int32_t level; + struct { + bool exist; + STFile file; + } files[TSDB_FTYPE_MAX]; +} SFSetWriterConfig; + +int32_t tsdbFSetWriterOpen(SFSetWriterConfig *config, SFSetWriter **writer); +int32_t tsdbFSetWriterClose(SFSetWriter **writer, bool abort, TFileOpArray *fopArr); +int32_t tsdbFSetWriteRow(SFSetWriter *writer, SRowInfo *row); +int32_t tsdbFSetWriteTombRecord(SFSetWriter *writer, const STombRecord *tombRecord); + +#ifdef __cplusplus +} +#endif + +#endif /*_TSDB_FSET_RW_H*/ \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbFile.c b/source/dnode/vnode/src/tsdb/tsdbFile.c index 9ff4b28779d29075e4bd939392d66c8b2797197b..62b37cd0a66a9a9d2f1515a17d1ce8f6f11cbdde 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFile.c +++ b/source/dnode/vnode/src/tsdb/tsdbFile.c @@ -112,7 +112,10 @@ static char* getFileNamePrefix(STsdb *pTsdb, SDiskID did, int32_t fid, uint64_t p += titoa(TD_VID(pTsdb->pVnode), 10, p); *(p++) = 'f'; - p += titoa(fid, 10, p); + if (fid < 0) { + *(p++) = '-'; + } + p += titoa((fid < 0) ? -fid : fid, 10, p); memcpy(p, "ver", 3); p += 3; diff --git a/source/dnode/vnode/src/tsdb/tsdbFile2.c b/source/dnode/vnode/src/tsdb/tsdbFile2.c new file mode 100644 index 0000000000000000000000000000000000000000..be021169cd890676e8a90bf0588d8617b97343f2 --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbFile2.c @@ -0,0 +1,295 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbFile2.h" + +// to_json +static int32_t head_to_json(const STFile *file, cJSON *json); +static int32_t data_to_json(const STFile *file, cJSON *json); +static int32_t sma_to_json(const STFile *file, cJSON *json); +static int32_t tomb_to_json(const STFile *file, cJSON *json); +static int32_t stt_to_json(const STFile *file, cJSON *json); + +// from_json +static int32_t head_from_json(const cJSON *json, STFile *file); +static int32_t data_from_json(const cJSON *json, STFile *file); +static int32_t sma_from_json(const cJSON *json, STFile *file); +static int32_t tomb_from_json(const cJSON *json, STFile *file); +static int32_t stt_from_json(const cJSON *json, STFile *file); + +static const struct { + const char *suffix; + int32_t (*to_json)(const STFile *file, cJSON *json); + int32_t (*from_json)(const cJSON *json, STFile *file); +} g_tfile_info[] = { + [TSDB_FTYPE_HEAD] = {"head", head_to_json, head_from_json}, + [TSDB_FTYPE_DATA] = {"data", data_to_json, data_from_json}, + [TSDB_FTYPE_SMA] = {"sma", sma_to_json, sma_from_json}, + [TSDB_FTYPE_TOMB] = {"tomb", tomb_to_json, tomb_from_json}, + [TSDB_FTYPE_STT] = {"stt", stt_to_json, stt_from_json}, +}; + +static void remove_file(const char *fname) { + taosRemoveFile(fname); + tsdbInfo("file:%s is removed", fname); +} + +static int32_t tfile_to_json(const STFile *file, cJSON *json) { + /* did.level */ + if (cJSON_AddNumberToObject(json, "did.level", file->did.level) == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + /* did.id */ + if (cJSON_AddNumberToObject(json, "did.id", file->did.id) == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + /* fid */ + if (cJSON_AddNumberToObject(json, "fid", file->fid) == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + /* cid */ + if (cJSON_AddNumberToObject(json, "cid", file->cid) == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + /* size */ + if (cJSON_AddNumberToObject(json, "size", file->size) == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + return 0; +} + +static int32_t tfile_from_json(const cJSON *json, STFile *file) { + const cJSON *item; + + /* did.level */ + item = cJSON_GetObjectItem(json, "did.level"); + if (cJSON_IsNumber(item)) { + file->did.level = item->valuedouble; + } else { + return TSDB_CODE_FILE_CORRUPTED; + } + + /* did.id */ + item = cJSON_GetObjectItem(json, "did.id"); + if (cJSON_IsNumber(item)) { + file->did.id = item->valuedouble; + } else { + return TSDB_CODE_FILE_CORRUPTED; + } + + /* fid */ + item = cJSON_GetObjectItem(json, "fid"); + if (cJSON_IsNumber(item)) { + file->fid = item->valuedouble; + } else { + return TSDB_CODE_FILE_CORRUPTED; + } + + /* cid */ + item = cJSON_GetObjectItem(json, "cid"); + if (cJSON_IsNumber(item)) { + file->cid = item->valuedouble; + } else { + return TSDB_CODE_FILE_CORRUPTED; + } + + /* size */ + item = cJSON_GetObjectItem(json, "size"); + if (cJSON_IsNumber(item)) { + file->size = item->valuedouble; + } else { + return TSDB_CODE_FILE_CORRUPTED; + } + + return 0; +} + +static int32_t head_to_json(const STFile *file, cJSON *json) { return tfile_to_json(file, json); } +static int32_t data_to_json(const STFile *file, cJSON *json) { return tfile_to_json(file, json); } +static int32_t sma_to_json(const STFile *file, cJSON *json) { return tfile_to_json(file, json); } +static int32_t tomb_to_json(const STFile *file, cJSON *json) { return tfile_to_json(file, json); } +static int32_t stt_to_json(const STFile *file, cJSON *json) { + int32_t code = tfile_to_json(file, json); + if (code) return code; + + /* lvl */ + if (cJSON_AddNumberToObject(json, "level", file->stt->level) == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + return 0; +} + +static int32_t head_from_json(const cJSON *json, STFile *file) { return tfile_from_json(json, file); } +static int32_t data_from_json(const cJSON *json, STFile *file) { return tfile_from_json(json, file); } +static int32_t sma_from_json(const cJSON *json, STFile *file) { return tfile_from_json(json, file); } +static int32_t tomb_from_json(const cJSON *json, STFile *file) { return tfile_from_json(json, file); } +static int32_t stt_from_json(const cJSON *json, STFile *file) { + int32_t code = tfile_from_json(json, file); + if (code) return code; + + const cJSON *item; + + /* lvl */ + item = cJSON_GetObjectItem(json, "level"); + if (cJSON_IsNumber(item)) { + file->stt->level = item->valuedouble; + } else { + return TSDB_CODE_FILE_CORRUPTED; + } + + return 0; +} + +int32_t tsdbTFileToJson(const STFile *file, cJSON *json) { + if (file->type == TSDB_FTYPE_STT) { + return g_tfile_info[file->type].to_json(file, json); + } else { + cJSON *item = cJSON_AddObjectToObject(json, g_tfile_info[file->type].suffix); + if (item == NULL) return TSDB_CODE_OUT_OF_MEMORY; + return g_tfile_info[file->type].to_json(file, item); + } +} + +int32_t tsdbJsonToTFile(const cJSON *json, tsdb_ftype_t ftype, STFile *f) { + f[0] = (STFile){.type = ftype}; + + if (ftype == TSDB_FTYPE_STT) { + int32_t code = g_tfile_info[ftype].from_json(json, f); + if (code) return code; + } else { + const cJSON *item = cJSON_GetObjectItem(json, g_tfile_info[ftype].suffix); + if (cJSON_IsObject(item)) { + int32_t code = g_tfile_info[ftype].from_json(item, f); + if (code) return code; + } else { + return TSDB_CODE_NOT_FOUND; + } + } + + return 0; +} + +int32_t tsdbTFileObjInit(STsdb *pTsdb, const STFile *f, STFileObj **fobj) { + fobj[0] = taosMemoryMalloc(sizeof(*fobj[0])); + if (!fobj[0]) return TSDB_CODE_OUT_OF_MEMORY; + + taosThreadMutexInit(&fobj[0]->mutex, NULL); + fobj[0]->f[0] = f[0]; + fobj[0]->state = TSDB_FSTATE_LIVE; + fobj[0]->ref = 1; + tsdbTFileName(pTsdb, f, fobj[0]->fname); + return 0; +} + +int32_t tsdbTFileObjRef(STFileObj *fobj) { + int32_t nRef; + taosThreadMutexLock(&fobj->mutex); + ASSERT(fobj->ref > 0 && fobj->state == TSDB_FSTATE_LIVE); + nRef = ++fobj->ref; + taosThreadMutexUnlock(&fobj->mutex); + tsdbTrace("ref file %s, fobj:%p ref %d", fobj->fname, fobj, nRef); + return 0; +} + +int32_t tsdbTFileObjUnref(STFileObj *fobj) { + taosThreadMutexLock(&fobj->mutex); + int32_t nRef = --fobj->ref; + taosThreadMutexUnlock(&fobj->mutex); + ASSERT(nRef >= 0); + tsdbTrace("unref file %s, fobj:%p ref %d", fobj->fname, fobj, nRef); + if (nRef == 0) { + if (fobj->state == TSDB_FSTATE_DEAD) { + remove_file(fobj->fname); + } + taosMemoryFree(fobj); + } + + return 0; +} + +int32_t tsdbTFileObjRemove(STFileObj *fobj) { + taosThreadMutexLock(&fobj->mutex); + ASSERT(fobj->state == TSDB_FSTATE_LIVE && fobj->ref > 0); + fobj->state = TSDB_FSTATE_DEAD; + int32_t nRef = --fobj->ref; + taosThreadMutexUnlock(&fobj->mutex); + tsdbTrace("remove unref file %s, fobj:%p ref %d", fobj->fname, fobj, nRef); + if (nRef == 0) { + remove_file(fobj->fname); + taosMemoryFree(fobj); + } + return 0; +} + +int32_t tsdbTFileName(STsdb *pTsdb, const STFile *f, char fname[]) { + SVnode *pVnode = pTsdb->pVnode; + STfs *pTfs = pVnode->pTfs; + + if (pTfs) { + snprintf(fname, // + TSDB_FILENAME_LEN, // + "%s%s%s%sv%df%dver%" PRId64 ".%s", // + tfsGetDiskPath(pTfs, f->did), // + TD_DIRSEP, // + pTsdb->path, // + TD_DIRSEP, // + TD_VID(pVnode), // + f->fid, // + f->cid, // + g_tfile_info[f->type].suffix); + } else { + snprintf(fname, // + TSDB_FILENAME_LEN, // + "%s%sv%df%dver%" PRId64 ".%s", // + pTsdb->path, // + TD_DIRSEP, // + TD_VID(pVnode), // + f->fid, // + f->cid, // + g_tfile_info[f->type].suffix); + } + return 0; +} + +bool tsdbIsSameTFile(const STFile *f1, const STFile *f2) { + if (f1->type != f2->type) return false; + if (f1->did.level != f2->did.level) return false; + if (f1->did.id != f2->did.id) return false; + if (f1->fid != f2->fid) return false; + if (f1->cid != f2->cid) return false; + return true; +} + +bool tsdbIsTFileChanged(const STFile *f1, const STFile *f2) { + if (f1->size != f2->size) return true; + // if (f1->type == TSDB_FTYPE_STT && f1->stt->nseg != f2->stt->nseg) return true; + return false; +} + +int32_t tsdbTFileObjCmpr(const STFileObj **fobj1, const STFileObj **fobj2) { + if (fobj1[0]->f->cid < fobj2[0]->f->cid) { + return -1; + } else if (fobj1[0]->f->cid > fobj2[0]->f->cid) { + return 1; + } else { + return 0; + } +} \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbFile2.h b/source/dnode/vnode/src/tsdb/tsdbFile2.h new file mode 100644 index 0000000000000000000000000000000000000000..11d08e45e667ddea6f8150239b472c86426f841a --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbFile2.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbDef.h" + +#ifndef _TSDB_FILE_H +#define _TSDB_FILE_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct STFile STFile; +typedef struct STFileObj STFileObj; + +typedef enum { + TSDB_FTYPE_HEAD = 0, // .head + TSDB_FTYPE_DATA, // .data + TSDB_FTYPE_SMA, // .sma + TSDB_FTYPE_TOMB, // .tomb + TSDB_FTYPE_STT = TSDB_FTYPE_TOMB + 2, // .stt +} tsdb_ftype_t; + +enum { + TSDB_FSTATE_LIVE = 1, + TSDB_FSTATE_DEAD, +}; + +#define TSDB_FTYPE_MIN TSDB_FTYPE_HEAD +#define TSDB_FTYPE_MAX (TSDB_FTYPE_TOMB + 1) + +// STFile +int32_t tsdbTFileToJson(const STFile *f, cJSON *json); +int32_t tsdbJsonToTFile(const cJSON *json, tsdb_ftype_t ftype, STFile *f); +int32_t tsdbTFileName(STsdb *pTsdb, const STFile *f, char fname[]); +bool tsdbIsSameTFile(const STFile *f1, const STFile *f2); +bool tsdbIsTFileChanged(const STFile *f1, const STFile *f2); + +// STFileObj +int32_t tsdbTFileObjInit(STsdb *pTsdb, const STFile *f, STFileObj **fobj); +int32_t tsdbTFileObjRef(STFileObj *fobj); +int32_t tsdbTFileObjUnref(STFileObj *fobj); +int32_t tsdbTFileObjRemove(STFileObj *fobj); +int32_t tsdbTFileObjCmpr(const STFileObj **fobj1, const STFileObj **fobj2); + +struct STFile { + tsdb_ftype_t type; + SDiskID did; // disk id + int32_t fid; // file id + int64_t cid; // commit id + int64_t size; + union { + struct { + int32_t level; + } stt[1]; + }; +}; + +struct STFileObj { + TdThreadMutex mutex; + STFile f[1]; + int32_t state; + int32_t ref; + char fname[TSDB_FILENAME_LEN]; +}; + +#ifdef __cplusplus +} +#endif + +#endif /*_TSDB_FILE_H*/ \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbIter.c b/source/dnode/vnode/src/tsdb/tsdbIter.c new file mode 100644 index 0000000000000000000000000000000000000000..9780cc6be63661869576bdfe7f1af544c59f1d59 --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbIter.c @@ -0,0 +1,780 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbIter.h" + +// STsdbIter ================ +struct STsdbIter { + EIterType type; + bool noMoreData; + bool filterByVersion; + int64_t range[2]; + union { + SRowInfo row[1]; + STombRecord record[1]; + }; + SRBTreeNode node[1]; + union { + struct { + SSttFileReader *reader; + const TSttBlkArray *sttBlkArray; + int32_t sttBlkArrayIdx; + SBlockData blockData[1]; + int32_t blockDataIdx; + } sttData[1]; + struct { + SDataFileReader *reader; + const TBrinBlkArray *brinBlkArray; + int32_t brinBlkArrayIdx; + SBrinBlock brinBlock[1]; + int32_t brinBlockIdx; + SBlockData blockData[1]; + int32_t blockDataIdx; + } dataData[1]; + struct { + SMemTable *memt; + TSDBKEY from[1]; + SRBTreeIter iter[1]; + STbData *tbData; + STbDataIter tbIter[1]; + } memtData[1]; + struct { + SSttFileReader *reader; + const TTombBlkArray *tombBlkArray; + int32_t tombBlkArrayIdx; + STombBlock tombBlock[1]; + int32_t tombBlockIdx; + } sttTomb[1]; + struct { + SDataFileReader *reader; + const TTombBlkArray *tombBlkArray; + int32_t tombBlkArrayIdx; + STombBlock tombBlock[1]; + int32_t tombBlockIdx; + } dataTomb[1]; + struct { + SMemTable *memt; + SRBTreeIter rbtIter[1]; + STbData *tbData; + SDelData *delData; + } memtTomb[1]; + }; +}; + +static int32_t tsdbSttIterNext(STsdbIter *iter, const TABLEID *tbid) { + while (!iter->noMoreData) { + for (; iter->sttData->blockDataIdx < iter->sttData->blockData->nRow; iter->sttData->blockDataIdx++) { + int64_t version = iter->sttData->blockData->aVersion[iter->sttData->blockDataIdx]; + + if (iter->filterByVersion && (version < iter->range[0] || version > iter->range[1])) { + continue; + } + + iter->row->suid = iter->sttData->blockData->suid; + iter->row->uid = iter->sttData->blockData->uid ? iter->sttData->blockData->uid + : iter->sttData->blockData->aUid[iter->sttData->blockDataIdx]; + + if (tbid && iter->row->suid == tbid->suid && iter->row->uid == tbid->uid) { + continue; + } + + iter->row->row = tsdbRowFromBlockData(iter->sttData->blockData, iter->sttData->blockDataIdx); + iter->sttData->blockDataIdx++; + goto _exit; + } + + if (iter->sttData->sttBlkArrayIdx >= TARRAY2_SIZE(iter->sttData->sttBlkArray)) { + iter->noMoreData = true; + break; + } + + for (; iter->sttData->sttBlkArrayIdx < TARRAY2_SIZE(iter->sttData->sttBlkArray); iter->sttData->sttBlkArrayIdx++) { + const SSttBlk *sttBlk = TARRAY2_GET_PTR(iter->sttData->sttBlkArray, iter->sttData->sttBlkArrayIdx); + + if (iter->filterByVersion && (sttBlk->maxVer < iter->range[0] || sttBlk->minVer > iter->range[1])) { + continue; + } + + if (tbid && tbid->suid == sttBlk->suid && tbid->uid == sttBlk->minUid && tbid->uid == sttBlk->maxUid) { + continue; + } + + int32_t code = tsdbSttFileReadBlockData(iter->sttData->reader, sttBlk, iter->sttData->blockData); + if (code) return code; + + iter->sttData->blockDataIdx = 0; + iter->sttData->sttBlkArrayIdx++; + break; + } + } + +_exit: + return 0; +} + +static int32_t tsdbDataIterNext(STsdbIter *iter, const TABLEID *tbid) { + int32_t code; + + while (!iter->noMoreData) { + for (;;) { + // SBlockData + for (; iter->dataData->blockDataIdx < iter->dataData->blockData->nRow; iter->dataData->blockDataIdx++) { + int64_t version = iter->dataData->blockData->aVersion[iter->dataData->blockDataIdx]; + if (iter->filterByVersion && (version < iter->range[0] || version > iter->range[1])) { + continue; + } + + if (tbid && tbid->suid == iter->dataData->blockData->suid && tbid->uid == iter->dataData->blockData->uid) { + iter->dataData->blockDataIdx = iter->dataData->blockData->nRow; + break; + } + + iter->row->row = tsdbRowFromBlockData(iter->dataData->blockData, iter->dataData->blockDataIdx); + iter->dataData->blockDataIdx++; + goto _exit; + } + + // SBrinBlock + if (iter->dataData->brinBlockIdx >= BRIN_BLOCK_SIZE(iter->dataData->brinBlock)) { + break; + } + + for (; iter->dataData->brinBlockIdx < BRIN_BLOCK_SIZE(iter->dataData->brinBlock); + iter->dataData->brinBlockIdx++) { + SBrinRecord record[1]; + tBrinBlockGet(iter->dataData->brinBlock, iter->dataData->brinBlockIdx, record); + + if (iter->filterByVersion && (record->maxVer < iter->range[0] || record->minVer > iter->range[1])) { + continue; + } + + if (tbid && tbid->suid == record->suid && tbid->uid == record->uid) { + continue; + } + + iter->row->suid = record->suid; + iter->row->uid = record->uid; + + code = tsdbDataFileReadBlockData(iter->dataData->reader, record, iter->dataData->blockData); + if (code) return code; + + iter->dataData->blockDataIdx = 0; + iter->dataData->brinBlockIdx++; + break; + } + } + + if (iter->dataData->brinBlkArrayIdx >= TARRAY2_SIZE(iter->dataData->brinBlkArray)) { + iter->noMoreData = true; + break; + } + + for (; iter->dataData->brinBlkArrayIdx < TARRAY2_SIZE(iter->dataData->brinBlkArray); + iter->dataData->brinBlkArrayIdx++) { + const SBrinBlk *brinBlk = TARRAY2_GET_PTR(iter->dataData->brinBlkArray, iter->dataData->brinBlkArrayIdx); + + if (iter->filterByVersion && (brinBlk->maxVer < iter->range[0] || brinBlk->minVer > iter->range[1])) { + continue; + } + + if (tbid && tbid->uid == brinBlk->minTbid.uid && tbid->uid == brinBlk->maxTbid.uid) { + continue; + } + + code = tsdbDataFileReadBrinBlock(iter->dataData->reader, brinBlk, iter->dataData->brinBlock); + if (code) return code; + + iter->dataData->brinBlockIdx = 0; + iter->dataData->brinBlkArrayIdx++; + break; + } + } + +_exit: + return 0; +} + +static int32_t tsdbMemTableIterNext(STsdbIter *iter, const TABLEID *tbid) { + SRBTreeNode *node; + + while (!iter->noMoreData) { + for (TSDBROW *row; iter->memtData->tbData && (row = tsdbTbDataIterGet(iter->memtData->tbIter));) { + if (tbid && tbid->suid == iter->memtData->tbData->suid && tbid->uid == iter->memtData->tbData->uid) { + iter->memtData->tbData = NULL; + break; + } + + if (iter->filterByVersion) { + int64_t version = TSDBROW_VERSION(row); + if (version < iter->range[0] || version > iter->range[1]) { + continue; + } + } + + iter->row->row = row[0]; + + tsdbTbDataIterNext(iter->memtData->tbIter); + goto _exit; + } + + for (;;) { + node = tRBTreeIterNext(iter->memtData->iter); + if (!node) { + iter->noMoreData = true; + goto _exit; + } + + iter->memtData->tbData = TCONTAINER_OF(node, STbData, rbtn); + if (tbid && tbid->suid == iter->memtData->tbData->suid && tbid->uid == iter->memtData->tbData->uid) { + continue; + } else { + iter->row->suid = iter->memtData->tbData->suid; + iter->row->uid = iter->memtData->tbData->uid; + tsdbTbDataIterOpen(iter->memtData->tbData, iter->memtData->from, 0, iter->memtData->tbIter); + break; + } + } + } + +_exit: + return 0; +} + +static int32_t tsdbDataTombIterNext(STsdbIter *iter, const TABLEID *tbid) { + while (!iter->noMoreData) { + for (; iter->dataTomb->tombBlockIdx < TOMB_BLOCK_SIZE(iter->dataTomb->tombBlock); iter->dataTomb->tombBlockIdx++) { + iter->record->suid = TARRAY2_GET(iter->dataTomb->tombBlock->suid, iter->dataTomb->tombBlockIdx); + iter->record->uid = TARRAY2_GET(iter->dataTomb->tombBlock->uid, iter->dataTomb->tombBlockIdx); + iter->record->version = TARRAY2_GET(iter->dataTomb->tombBlock->version, iter->dataTomb->tombBlockIdx); + + if (iter->filterByVersion && (iter->record->version < iter->range[0] || iter->record->version > iter->range[1])) { + continue; + } + + if (tbid && iter->record->suid == tbid->suid && iter->record->uid == tbid->uid) { + continue; + } + + iter->record->skey = TARRAY2_GET(iter->dataTomb->tombBlock->skey, iter->dataTomb->tombBlockIdx); + iter->record->ekey = TARRAY2_GET(iter->dataTomb->tombBlock->ekey, iter->dataTomb->tombBlockIdx); + iter->dataTomb->tombBlockIdx++; + goto _exit; + } + + if (iter->dataTomb->tombBlkArrayIdx >= TARRAY2_SIZE(iter->dataTomb->tombBlkArray)) { + iter->noMoreData = true; + goto _exit; + } + + for (; iter->dataTomb->tombBlkArrayIdx < TARRAY2_SIZE(iter->dataTomb->tombBlkArray); + iter->dataTomb->tombBlkArrayIdx++) { + const STombBlk *tombBlk = TARRAY2_GET_PTR(iter->dataTomb->tombBlkArray, iter->dataTomb->tombBlkArrayIdx); + + if (tbid && tbid->suid == tombBlk->minTbid.suid && tbid->uid == tombBlk->minTbid.uid && + tbid->suid == tombBlk->maxTbid.suid && tbid->uid == tombBlk->maxTbid.uid) { + continue; + } + + int32_t code = tsdbDataFileReadTombBlock(iter->dataTomb->reader, tombBlk, iter->dataTomb->tombBlock); + if (code) return code; + + iter->dataTomb->tombBlockIdx = 0; + iter->dataTomb->tombBlkArrayIdx++; + break; + } + } + +_exit: + return 0; +} + +static int32_t tsdbMemTombIterNext(STsdbIter *iter, const TABLEID *tbid) { + while (!iter->noMoreData) { + for (; iter->memtTomb->delData;) { + if (tbid && tbid->uid == iter->memtTomb->tbData->uid) { + iter->memtTomb->delData = NULL; + break; + } + + if (iter->filterByVersion && + (iter->memtTomb->delData->version < iter->range[0] || iter->memtTomb->delData->version > iter->range[1])) { + continue; + } + + iter->record->suid = iter->memtTomb->tbData->suid; + iter->record->uid = iter->memtTomb->tbData->uid; + iter->record->version = iter->memtTomb->delData->version; + iter->record->skey = iter->memtTomb->delData->sKey; + iter->record->ekey = iter->memtTomb->delData->eKey; + + iter->memtTomb->delData = iter->memtTomb->delData->pNext; + goto _exit; + } + + for (;;) { + SRBTreeNode *node = tRBTreeIterNext(iter->memtTomb->rbtIter); + if (node == NULL) { + iter->noMoreData = true; + goto _exit; + } + + iter->memtTomb->tbData = TCONTAINER_OF(node, STbData, rbtn); + if (tbid && tbid->uid == iter->memtTomb->tbData->uid) { + continue; + } else { + iter->memtTomb->delData = iter->memtTomb->tbData->pHead; + break; + } + } + } + +_exit: + return 0; +} + +static int32_t tsdbSttIterOpen(STsdbIter *iter) { + int32_t code; + + code = tsdbSttFileReadSttBlk(iter->sttData->reader, &iter->sttData->sttBlkArray); + if (code) return code; + + if (TARRAY2_SIZE(iter->sttData->sttBlkArray) == 0) { + iter->noMoreData = true; + return 0; + } + + iter->sttData->sttBlkArrayIdx = 0; + tBlockDataCreate(iter->sttData->blockData); + iter->sttData->blockDataIdx = 0; + + return tsdbSttIterNext(iter, NULL); +} + +static int32_t tsdbDataIterOpen(STsdbIter *iter) { + int32_t code; + + // SBrinBlk + code = tsdbDataFileReadBrinBlk(iter->dataData->reader, &iter->dataData->brinBlkArray); + if (code) return code; + + if (TARRAY2_SIZE(iter->dataData->brinBlkArray) == 0) { + iter->noMoreData = true; + return 0; + } + + iter->dataData->brinBlkArrayIdx = 0; + + // SBrinBlock + tBrinBlockInit(iter->dataData->brinBlock); + iter->dataData->brinBlockIdx = 0; + + // SBlockData + tBlockDataCreate(iter->dataData->blockData); + iter->dataData->blockDataIdx = 0; + + return tsdbDataIterNext(iter, NULL); +} + +static int32_t tsdbMemTableIterOpen(STsdbIter *iter) { + if (iter->memtData->memt->nRow == 0) { + iter->noMoreData = true; + return 0; + } + + iter->memtData->iter[0] = tRBTreeIterCreate(iter->memtData->memt->tbDataTree, 1); + return tsdbMemTableIterNext(iter, NULL); +} + +static int32_t tsdbSttIterClose(STsdbIter *iter) { + tBlockDataDestroy(iter->sttData->blockData); + return 0; +} + +static int32_t tsdbDataTombIterOpen(STsdbIter *iter) { + int32_t code; + + code = tsdbDataFileReadTombBlk(iter->dataTomb->reader, &iter->dataTomb->tombBlkArray); + if (code) return code; + + if (TARRAY2_SIZE(iter->dataTomb->tombBlkArray) == 0) { + iter->noMoreData = true; + return 0; + } + iter->dataTomb->tombBlkArrayIdx = 0; + + tTombBlockInit(iter->dataTomb->tombBlock); + iter->dataTomb->tombBlockIdx = 0; + + return tsdbDataTombIterNext(iter, NULL); +} + +static int32_t tsdbMemTombIterOpen(STsdbIter *iter) { + int32_t code; + + if (iter->memtTomb->memt->nDel == 0) { + iter->noMoreData = true; + return 0; + } + + iter->memtTomb->rbtIter[0] = tRBTreeIterCreate(iter->memtTomb->memt->tbDataTree, 1); + return tsdbMemTombIterNext(iter, NULL); +} + +static int32_t tsdbDataIterClose(STsdbIter *iter) { + tBrinBlockDestroy(iter->dataData->brinBlock); + tBlockDataDestroy(iter->dataData->blockData); + return 0; +} + +static int32_t tsdbMemTableIterClose(STsdbIter *iter) { return 0; } + +static int32_t tsdbSttTombIterNext(STsdbIter *iter, const TABLEID *tbid) { + while (!iter->noMoreData) { + for (; iter->sttTomb->tombBlockIdx < TOMB_BLOCK_SIZE(iter->sttTomb->tombBlock); iter->sttTomb->tombBlockIdx++) { + iter->record->suid = TARRAY2_GET(iter->sttTomb->tombBlock->suid, iter->sttTomb->tombBlockIdx); + iter->record->uid = TARRAY2_GET(iter->sttTomb->tombBlock->uid, iter->sttTomb->tombBlockIdx); + iter->record->version = TARRAY2_GET(iter->sttTomb->tombBlock->version, iter->sttTomb->tombBlockIdx); + + if (iter->filterByVersion && (iter->record->version < iter->range[0] || iter->record->version > iter->range[1])) { + continue; + } + + if (tbid && iter->record->suid == tbid->suid && iter->record->uid == tbid->uid) { + continue; + } + + iter->record->skey = TARRAY2_GET(iter->sttTomb->tombBlock->skey, iter->sttTomb->tombBlockIdx); + iter->record->ekey = TARRAY2_GET(iter->sttTomb->tombBlock->ekey, iter->sttTomb->tombBlockIdx); + iter->sttTomb->tombBlockIdx++; + goto _exit; + } + + if (iter->sttTomb->tombBlkArrayIdx >= TARRAY2_SIZE(iter->sttTomb->tombBlkArray)) { + iter->noMoreData = true; + goto _exit; + } + + for (; iter->sttTomb->tombBlkArrayIdx < TARRAY2_SIZE(iter->sttTomb->tombBlkArray); + iter->sttTomb->tombBlkArrayIdx++) { + const STombBlk *tombBlk = TARRAY2_GET_PTR(iter->sttTomb->tombBlkArray, iter->sttTomb->tombBlkArrayIdx); + + if (iter->filterByVersion && (tombBlk->maxVer < iter->range[0] || tombBlk->minVer > iter->range[1])) { + continue; + } + + if (tbid && tbid->suid == tombBlk->minTbid.suid && tbid->uid == tombBlk->minTbid.uid && + tbid->suid == tombBlk->maxTbid.suid && tbid->uid == tombBlk->maxTbid.uid) { + continue; + } + + int32_t code = tsdbSttFileReadTombBlock(iter->sttTomb->reader, tombBlk, iter->sttTomb->tombBlock); + if (code) return code; + + iter->sttTomb->tombBlockIdx = 0; + iter->sttTomb->tombBlkArrayIdx++; + break; + } + } + +_exit: + return 0; +} + +static int32_t tsdbSttTombIterOpen(STsdbIter *iter) { + int32_t code; + + code = tsdbSttFileReadTombBlk(iter->sttTomb->reader, &iter->sttTomb->tombBlkArray); + if (code) return code; + + if (TARRAY2_SIZE(iter->sttTomb->tombBlkArray) == 0) { + iter->noMoreData = true; + return 0; + } + + iter->sttTomb->tombBlkArrayIdx = 0; + tTombBlockInit(iter->sttTomb->tombBlock); + iter->sttTomb->tombBlockIdx = 0; + + return tsdbSttTombIterNext(iter, NULL); +} + +int32_t tsdbIterOpen(const STsdbIterConfig *config, STsdbIter **iter) { + int32_t code; + + iter[0] = taosMemoryCalloc(1, sizeof(*iter[0])); + if (iter[0] == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + iter[0]->type = config->type; + iter[0]->noMoreData = false; + iter[0]->filterByVersion = config->filterByVersion; + if (iter[0]->filterByVersion) { + iter[0]->range[0] = config->verRange[0]; + iter[0]->range[1] = config->verRange[1]; + } + + switch (config->type) { + case TSDB_ITER_TYPE_STT: + iter[0]->sttData->reader = config->sttReader; + code = tsdbSttIterOpen(iter[0]); + break; + case TSDB_ITER_TYPE_DATA: + iter[0]->dataData->reader = config->dataReader; + code = tsdbDataIterOpen(iter[0]); + break; + case TSDB_ITER_TYPE_MEMT: + iter[0]->memtData->memt = config->memt; + iter[0]->memtData->from[0] = config->from[0]; + code = tsdbMemTableIterOpen(iter[0]); + break; + case TSDB_ITER_TYPE_STT_TOMB: + iter[0]->sttTomb->reader = config->sttReader; + code = tsdbSttTombIterOpen(iter[0]); + break; + case TSDB_ITER_TYPE_DATA_TOMB: + iter[0]->dataTomb->reader = config->dataReader; + code = tsdbDataTombIterOpen(iter[0]); + break; + case TSDB_ITER_TYPE_MEMT_TOMB: + iter[0]->memtTomb->memt = config->memt; + code = tsdbMemTombIterOpen(iter[0]); + break; + default: + code = TSDB_CODE_INVALID_PARA; + ASSERTS(false, "Not implemented"); + } + + if (code) { + taosMemoryFree(iter[0]); + iter[0] = NULL; + } + return code; +} + +static int32_t tsdbSttTombIterClose(STsdbIter *iter) { + tTombBlockDestroy(iter->sttTomb->tombBlock); + return 0; +} + +static int32_t tsdbDataTombIterClose(STsdbIter *iter) { + tTombBlockDestroy(iter->dataTomb->tombBlock); + return 0; +} + +int32_t tsdbIterClose(STsdbIter **iter) { + switch (iter[0]->type) { + case TSDB_ITER_TYPE_STT: + tsdbSttIterClose(iter[0]); + break; + case TSDB_ITER_TYPE_DATA: + tsdbDataIterClose(iter[0]); + break; + case TSDB_ITER_TYPE_MEMT: + tsdbMemTableIterClose(iter[0]); + break; + case TSDB_ITER_TYPE_STT_TOMB: + tsdbSttTombIterClose(iter[0]); + break; + case TSDB_ITER_TYPE_DATA_TOMB: + tsdbDataTombIterClose(iter[0]); + break; + case TSDB_ITER_TYPE_MEMT_TOMB: + break; + default: + ASSERT(false); + } + taosMemoryFree(iter[0]); + iter[0] = NULL; + return 0; +} + +int32_t tsdbIterNext(STsdbIter *iter) { + switch (iter->type) { + case TSDB_ITER_TYPE_STT: + return tsdbSttIterNext(iter, NULL); + case TSDB_ITER_TYPE_DATA: + return tsdbDataIterNext(iter, NULL); + case TSDB_ITER_TYPE_MEMT: + return tsdbMemTableIterNext(iter, NULL); + case TSDB_ITER_TYPE_STT_TOMB: + return tsdbSttTombIterNext(iter, NULL); + case TSDB_ITER_TYPE_DATA_TOMB: + return tsdbDataTombIterNext(iter, NULL); + case TSDB_ITER_TYPE_MEMT_TOMB: + return tsdbMemTombIterNext(iter, NULL); + default: + ASSERT(false); + } + return 0; +} + +static int32_t tsdbIterSkipTableData(STsdbIter *iter, const TABLEID *tbid) { + switch (iter->type) { + case TSDB_ITER_TYPE_STT: + return tsdbSttIterNext(iter, tbid); + case TSDB_ITER_TYPE_DATA: + return tsdbDataIterNext(iter, tbid); + case TSDB_ITER_TYPE_MEMT: + return tsdbMemTableIterNext(iter, tbid); + case TSDB_ITER_TYPE_STT_TOMB: + return tsdbSttTombIterNext(iter, tbid); + case TSDB_ITER_TYPE_DATA_TOMB: + return tsdbDataTombIterNext(iter, tbid); + case TSDB_ITER_TYPE_MEMT_TOMB: + return tsdbMemTombIterNext(iter, tbid); + default: + ASSERT(false); + } + return 0; +} + +static int32_t tsdbIterCmprFn(const SRBTreeNode *n1, const SRBTreeNode *n2) { + STsdbIter *iter1 = TCONTAINER_OF(n1, STsdbIter, node); + STsdbIter *iter2 = TCONTAINER_OF(n2, STsdbIter, node); + return tRowInfoCmprFn(&iter1->row, &iter2->row); +} + +static int32_t tsdbTombIterCmprFn(const SRBTreeNode *n1, const SRBTreeNode *n2) { + STsdbIter *iter1 = TCONTAINER_OF(n1, STsdbIter, node); + STsdbIter *iter2 = TCONTAINER_OF(n2, STsdbIter, node); + + if (iter1->record->suid < iter2->record->suid) { + return -1; + } else if (iter1->record->suid > iter2->record->suid) { + return 1; + } + + if (iter1->record->uid < iter2->record->uid) { + return -1; + } else if (iter1->record->uid > iter2->record->uid) { + return 1; + } + + if (iter1->record->version < iter2->record->version) { + return -1; + } else if (iter1->record->version > iter2->record->version) { + return 1; + } + + return 0; +} + +// SIterMerger ================ +struct SIterMerger { + bool isTomb; + STsdbIter *iter; + SRBTree iterTree[1]; +}; + +int32_t tsdbIterMergerOpen(const TTsdbIterArray *iterArray, SIterMerger **merger, bool isTomb) { + STsdbIter *iter; + SRBTreeNode *node; + + merger[0] = taosMemoryCalloc(1, sizeof(*merger[0])); + if (merger[0] == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + merger[0]->isTomb = isTomb; + if (isTomb) { + tRBTreeCreate(merger[0]->iterTree, tsdbTombIterCmprFn); + } else { + tRBTreeCreate(merger[0]->iterTree, tsdbIterCmprFn); + } + TARRAY2_FOREACH(iterArray, iter) { + if (iter->noMoreData) continue; + node = tRBTreePut(merger[0]->iterTree, iter->node); + ASSERT(node); + } + + return tsdbIterMergerNext(merger[0]); +} + +int32_t tsdbIterMergerClose(SIterMerger **merger) { + if (merger[0]) { + taosMemoryFree(merger[0]); + merger[0] = NULL; + } + return 0; +} + +int32_t tsdbIterMergerNext(SIterMerger *merger) { + int32_t code; + int32_t c; + SRBTreeNode *node; + + if (merger->iter) { + code = tsdbIterNext(merger->iter); + if (code) return code; + + if (merger->iter->noMoreData) { + merger->iter = NULL; + } else if ((node = tRBTreeMin(merger->iterTree))) { + c = merger->iterTree->cmprFn(merger->iter->node, node); + ASSERT(c); + if (c > 0) { + node = tRBTreePut(merger->iterTree, merger->iter->node); + ASSERT(node); + merger->iter = NULL; + } + } + } + + if (merger->iter == NULL && (node = tRBTreeDropMin(merger->iterTree))) { + merger->iter = TCONTAINER_OF(node, STsdbIter, node); + } + + return 0; +} + +SRowInfo *tsdbIterMergerGetData(SIterMerger *merger) { + ASSERT(!merger->isTomb); + return merger->iter ? merger->iter->row : NULL; +} + +STombRecord *tsdbIterMergerGetTombRecord(SIterMerger *merger) { + ASSERT(merger->isTomb); + return merger->iter ? merger->iter->record : NULL; +} + +int32_t tsdbIterMergerSkipTableData(SIterMerger *merger, const TABLEID *tbid) { + int32_t code; + int32_t c; + SRBTreeNode *node; + + while (merger->iter && tbid->suid == merger->iter->row->suid && tbid->uid == merger->iter->row->uid) { + int32_t code = tsdbIterSkipTableData(merger->iter, tbid); + if (code) return code; + + if (merger->iter->noMoreData) { + merger->iter = NULL; + } else if ((node = tRBTreeMin(merger->iterTree))) { + c = merger->iterTree->cmprFn(merger->iter->node, node); + ASSERT(c); + if (c > 0) { + node = tRBTreePut(merger->iterTree, merger->iter->node); + ASSERT(node); + merger->iter = NULL; + } + } + + if (!merger->iter && (node = tRBTreeDropMin(merger->iterTree))) { + merger->iter = TCONTAINER_OF(node, STsdbIter, node); + } + } + + return 0; +} \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbIter.h b/source/dnode/vnode/src/tsdb/tsdbIter.h new file mode 100644 index 0000000000000000000000000000000000000000..367901bd848df5b752a3439249dba95f76369b45 --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbIter.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "trbtree.h" +#include "tsdbDataFileRW.h" +#include "tsdbDef.h" +#include "tsdbSttFileRW.h" + +#ifndef _TSDB_ITER_H_ +#define _TSDB_ITER_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct SIterMerger SIterMerger; +typedef struct STsdbIter STsdbIter; +typedef TARRAY2(STsdbIter *) TTsdbIterArray; + +typedef enum { + TSDB_ITER_TYPE_STT = 1, + TSDB_ITER_TYPE_DATA, + TSDB_ITER_TYPE_MEMT, + TSDB_ITER_TYPE_STT_TOMB, + TSDB_ITER_TYPE_DATA_TOMB, + TSDB_ITER_TYPE_MEMT_TOMB, +} EIterType; + +typedef struct { + EIterType type; + union { + SSttFileReader *sttReader; // TSDB_ITER_TYPE_STT || TSDB_ITER_TYPE_STT_TOMB + SDataFileReader *dataReader; // TSDB_ITER_TYPE_DATA || TSDB_ITER_TYPE_DATA_TOMB + struct { + SMemTable *memt; // TSDB_ITER_TYPE_MEMT_TOMB + TSDBKEY from[1]; + }; // TSDB_ITER_TYPE_MEMT + }; + bool filterByVersion; + int64_t verRange[2]; +} STsdbIterConfig; + +// STsdbIter =============== +int32_t tsdbIterOpen(const STsdbIterConfig *config, STsdbIter **iter); +int32_t tsdbIterClose(STsdbIter **iter); +int32_t tsdbIterNext(STsdbIter *iter); + +// SIterMerger =============== +int32_t tsdbIterMergerOpen(const TTsdbIterArray *iterArray, SIterMerger **merger, bool isTomb); +int32_t tsdbIterMergerClose(SIterMerger **merger); +int32_t tsdbIterMergerNext(SIterMerger *merger); +int32_t tsdbIterMergerSkipTableData(SIterMerger *merger, const TABLEID *tbid); + +SRowInfo *tsdbIterMergerGetData(SIterMerger *merger); +STombRecord *tsdbIterMergerGetTombRecord(SIterMerger *merger); + +#ifdef __cplusplus +} +#endif + +#endif /*_TSDB_ITER_H_*/ \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbMemTable.c b/source/dnode/vnode/src/tsdb/tsdbMemTable.c index 6d223e00c54272a47377f3521a181d350ab2c4a2..ee3abf7559ecae536ab6dc522d541ee611477237 100644 --- a/source/dnode/vnode/src/tsdb/tsdbMemTable.c +++ b/source/dnode/vnode/src/tsdb/tsdbMemTable.c @@ -38,6 +38,16 @@ static int32_t tsdbInsertRowDataToTable(SMemTable *pMemTable, STbData *pTbData, static int32_t tsdbInsertColDataToTable(SMemTable *pMemTable, STbData *pTbData, int64_t version, SSubmitTbData *pSubmitTbData, int32_t *affectedRows); +static int32_t tTbDataCmprFn(const SRBTreeNode *n1, const SRBTreeNode *n2) { + STbData *tbData1 = TCONTAINER_OF(n1, STbData, rbtn); + STbData *tbData2 = TCONTAINER_OF(n2, STbData, rbtn); + if (tbData1->suid < tbData2->suid) return -1; + if (tbData1->suid > tbData2->suid) return 1; + if (tbData1->uid < tbData2->uid) return -1; + if (tbData1->uid > tbData2->uid) return 1; + return 0; +} + int32_t tsdbMemTableCreate(STsdb *pTsdb, SMemTable **ppMemTable) { int32_t code = 0; SMemTable *pMemTable = NULL; @@ -66,6 +76,7 @@ int32_t tsdbMemTableCreate(STsdb *pTsdb, SMemTable **ppMemTable) { goto _err; } vnodeBufPoolRef(pMemTable->pPool); + tRBTreeCreate(pMemTable->tbDataTree, tTbDataCmprFn); *ppMemTable = pMemTable; return code; @@ -406,6 +417,8 @@ static int32_t tsdbGetOrCreateTbData(SMemTable *pMemTable, tb_uid_t suid, tb_uid pMemTable->aBucket[idx] = pTbData; pMemTable->nTbData++; + tRBTreePut(pMemTable->tbDataTree, pTbData->rbtn); + taosWUnLockLatch(&pMemTable->latch); _exit: diff --git a/source/dnode/vnode/src/tsdb/tsdbMerge.c b/source/dnode/vnode/src/tsdb/tsdbMerge.c new file mode 100644 index 0000000000000000000000000000000000000000..ec0ea3c60ffa1c8ef0a5564dcdb2e2f59bd6935a --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbMerge.c @@ -0,0 +1,454 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbMerge.h" + +typedef struct { + STsdb *tsdb; + TFileSetArray *fsetArr; + + int32_t sttTrigger; + int32_t maxRow; + int32_t minRow; + int32_t szPage; + int8_t cmprAlg; + int64_t compactVersion; + int64_t cid; + + // context + struct { + bool opened; + int64_t now; + STFileSet *fset; + bool toData; + int32_t level; + SSttLvl *lvl; + TABLEID tbid[1]; + } ctx[1]; + + TFileOpArray fopArr[1]; + + // reader + TSttFileReaderArray sttReaderArr[1]; + // iter + TTsdbIterArray dataIterArr[1]; + SIterMerger *dataIterMerger; + TTsdbIterArray tombIterArr[1]; + SIterMerger *tombIterMerger; + // writer + SFSetWriter *writer; +} SMerger; + +static int32_t tsdbMergerOpen(SMerger *merger) { + merger->ctx->now = taosGetTimestampSec(); + merger->maxRow = merger->tsdb->pVnode->config.tsdbCfg.maxRows; + merger->minRow = merger->tsdb->pVnode->config.tsdbCfg.minRows; + merger->szPage = merger->tsdb->pVnode->config.tsdbPageSize; + merger->cmprAlg = merger->tsdb->pVnode->config.tsdbCfg.compression; + merger->compactVersion = INT64_MAX; + merger->cid = tsdbFSAllocEid(merger->tsdb->pFS); + merger->ctx->opened = true; + return 0; +} + +static int32_t tsdbMergerClose(SMerger *merger) { + int32_t code = 0; + int32_t lino = 0; + SVnode *pVnode = merger->tsdb->pVnode; + + // edit file system + code = tsdbFSEditBegin(merger->tsdb->pFS, merger->fopArr, TSDB_FEDIT_MERGE); + TSDB_CHECK_CODE(code, lino, _exit); + + taosThreadRwlockWrlock(&merger->tsdb->rwLock); + code = tsdbFSEditCommit(merger->tsdb->pFS); + if (code) { + taosThreadRwlockUnlock(&merger->tsdb->rwLock); + TSDB_CHECK_CODE(code, lino, _exit); + } + taosThreadRwlockUnlock(&merger->tsdb->rwLock); + + ASSERT(merger->writer == NULL); + ASSERT(merger->dataIterMerger == NULL); + ASSERT(merger->tombIterMerger == NULL); + ASSERT(TARRAY2_SIZE(merger->dataIterArr) == 0); + ASSERT(TARRAY2_SIZE(merger->tombIterArr) == 0); + ASSERT(TARRAY2_SIZE(merger->sttReaderArr) == 0); + + // clear the merge + TARRAY2_DESTROY(merger->tombIterArr, NULL); + TARRAY2_DESTROY(merger->dataIterArr, NULL); + TARRAY2_DESTROY(merger->sttReaderArr, NULL); + TARRAY2_DESTROY(merger->fopArr, NULL); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(pVnode), lino, code); + } + return code; +} + +static int32_t tsdbMergeFileSetBeginOpenReader(SMerger *merger) { + int32_t code = 0; + int32_t lino = 0; + + merger->ctx->toData = true; + merger->ctx->level = 0; + + // TODO: optimize merge strategy + for (int32_t i = 0;; ++i) { + if (i >= TARRAY2_SIZE(merger->ctx->fset->lvlArr)) { + merger->ctx->lvl = NULL; + break; + } + + merger->ctx->lvl = TARRAY2_GET(merger->ctx->fset->lvlArr, i); + if (merger->ctx->lvl->level != merger->ctx->level || + TARRAY2_SIZE(merger->ctx->lvl->fobjArr) + 1 < merger->sttTrigger) { + merger->ctx->toData = false; + merger->ctx->lvl = NULL; + break; + } + + merger->ctx->level++; + + STFileObj *fobj; + int32_t numFile = 0; + TARRAY2_FOREACH(merger->ctx->lvl->fobjArr, fobj) { + if (numFile == merger->sttTrigger) { + break; + } + + STFileOp op = { + .optype = TSDB_FOP_REMOVE, + .fid = merger->ctx->fset->fid, + .of = fobj->f[0], + }; + code = TARRAY2_APPEND(merger->fopArr, op); + TSDB_CHECK_CODE(code, lino, _exit); + + SSttFileReader *reader; + SSttFileReaderConfig config = { + .tsdb = merger->tsdb, + .szPage = merger->szPage, + .file[0] = fobj->f[0], + }; + + code = tsdbSttFileReaderOpen(fobj->fname, &config, &reader); + TSDB_CHECK_CODE(code, lino, _exit); + + code = TARRAY2_APPEND(merger->sttReaderArr, reader); + TSDB_CHECK_CODE(code, lino, _exit); + + numFile++; + } + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(merger->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbMergeFileSetBeginOpenIter(SMerger *merger) { + int32_t code = 0; + int32_t lino = 0; + int32_t vid = TD_VID(merger->tsdb->pVnode); + + SSttFileReader *sttReader; + TARRAY2_FOREACH(merger->sttReaderArr, sttReader) { + STsdbIter *iter; + STsdbIterConfig config = {0}; + + // data iter + config.type = TSDB_ITER_TYPE_STT; + config.sttReader = sttReader; + + code = tsdbIterOpen(&config, &iter); + TSDB_CHECK_CODE(code, lino, _exit); + + code = TARRAY2_APPEND(merger->dataIterArr, iter); + TSDB_CHECK_CODE(code, lino, _exit); + + // tomb iter + config.type = TSDB_ITER_TYPE_STT_TOMB; + config.sttReader = sttReader; + + code = tsdbIterOpen(&config, &iter); + TSDB_CHECK_CODE(code, lino, _exit); + + code = TARRAY2_APPEND(merger->tombIterArr, iter); + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbIterMergerOpen(merger->dataIterArr, &merger->dataIterMerger, false); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbIterMergerOpen(merger->tombIterArr, &merger->tombIterMerger, true); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(vid, lino, code); + } + return code; +} + +static int32_t tsdbMergeFileSetBeginOpenWriter(SMerger *merger) { + int32_t code = 0; + int32_t lino = 0; + int32_t vid = TD_VID(merger->tsdb->pVnode); + + SDiskID did; + int32_t level = tsdbFidLevel(merger->ctx->fset->fid, &merger->tsdb->keepCfg, merger->ctx->now); + if (tfsAllocDisk(merger->tsdb->pVnode->pTfs, level, &did) < 0) { + code = TSDB_CODE_FS_NO_VALID_DISK; + TSDB_CHECK_CODE(code, lino, _exit); + } + tfsMkdirRecurAt(merger->tsdb->pVnode->pTfs, merger->tsdb->path, did); + SFSetWriterConfig config = { + .tsdb = merger->tsdb, + .toSttOnly = true, + .compactVersion = merger->compactVersion, + .minRow = merger->minRow, + .maxRow = merger->maxRow, + .szPage = merger->szPage, + .cmprAlg = merger->cmprAlg, + .fid = merger->ctx->fset->fid, + .cid = merger->cid, + .did = did, + .level = merger->ctx->level, + }; + + if (merger->ctx->toData) { + config.toSttOnly = false; + + for (int32_t ftype = 0; ftype < TSDB_FTYPE_MAX; ++ftype) { + if (merger->ctx->fset->farr[ftype]) { + config.files[ftype].exist = true; + config.files[ftype].file = merger->ctx->fset->farr[ftype]->f[0]; + } else { + config.files[ftype].exist = false; + } + } + } + + code = tsdbFSetWriterOpen(&config, &merger->writer); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(vid, lino, code); + } + return code; +} + +static int32_t tsdbMergeFileSetBegin(SMerger *merger) { + int32_t code = 0; + int32_t lino = 0; + + ASSERT(TARRAY2_SIZE(merger->sttReaderArr) == 0); + ASSERT(TARRAY2_SIZE(merger->dataIterArr) == 0); + ASSERT(merger->dataIterMerger == NULL); + ASSERT(merger->writer == NULL); + + merger->ctx->tbid->suid = 0; + merger->ctx->tbid->uid = 0; + + // open reader + code = tsdbMergeFileSetBeginOpenReader(merger); + TSDB_CHECK_CODE(code, lino, _exit); + + // open iterator + code = tsdbMergeFileSetBeginOpenIter(merger); + TSDB_CHECK_CODE(code, lino, _exit); + + // open writer + code = tsdbMergeFileSetBeginOpenWriter(merger); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(merger->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbMergeFileSetEndCloseWriter(SMerger *merger) { + return tsdbFSetWriterClose(&merger->writer, 0, merger->fopArr); +} + +static int32_t tsdbMergeFileSetEndCloseIter(SMerger *merger) { + tsdbIterMergerClose(&merger->tombIterMerger); + TARRAY2_CLEAR(merger->tombIterArr, tsdbIterClose); + tsdbIterMergerClose(&merger->dataIterMerger); + TARRAY2_CLEAR(merger->dataIterArr, tsdbIterClose); + return 0; +} + +static int32_t tsdbMergeFileSetEndCloseReader(SMerger *merger) { + TARRAY2_CLEAR(merger->sttReaderArr, tsdbSttFileReaderClose); + return 0; +} + +static int32_t tsdbMergeFileSetEnd(SMerger *merger) { + int32_t code = 0; + int32_t lino = 0; + + code = tsdbMergeFileSetEndCloseWriter(merger); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbMergeFileSetEndCloseIter(merger); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbMergeFileSetEndCloseReader(merger); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(merger->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbMergeFileSet(SMerger *merger, STFileSet *fset) { + int32_t code = 0; + int32_t lino = 0; + + merger->ctx->fset = fset; + code = tsdbMergeFileSetBegin(merger); + TSDB_CHECK_CODE(code, lino, _exit); + + // data + SMetaInfo info; + SRowInfo *row; + merger->ctx->tbid->suid = 0; + merger->ctx->tbid->uid = 0; + while ((row = tsdbIterMergerGetData(merger->dataIterMerger)) != NULL) { + if (row->uid != merger->ctx->tbid->uid) { + merger->ctx->tbid->uid = row->uid; + merger->ctx->tbid->suid = row->suid; + + if (metaGetInfo(merger->tsdb->pVnode->pMeta, row->uid, &info, NULL) != 0) { + code = tsdbIterMergerSkipTableData(merger->dataIterMerger, merger->ctx->tbid); + TSDB_CHECK_CODE(code, lino, _exit); + continue; + } + } + + code = tsdbFSetWriteRow(merger->writer, row); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbIterMergerNext(merger->dataIterMerger); + TSDB_CHECK_CODE(code, lino, _exit); + } + + // tomb + merger->ctx->tbid->suid = 0; + merger->ctx->tbid->uid = 0; + for (STombRecord *record; (record = tsdbIterMergerGetTombRecord(merger->tombIterMerger)) != NULL;) { + if (record->uid != merger->ctx->tbid->uid) { + merger->ctx->tbid->uid = record->uid; + merger->ctx->tbid->suid = record->suid; + + if (metaGetInfo(merger->tsdb->pVnode->pMeta, record->uid, &info, NULL) != 0) { + code = tsdbIterMergerSkipTableData(merger->tombIterMerger, merger->ctx->tbid); + TSDB_CHECK_CODE(code, lino, _exit); + continue; + } + } + code = tsdbFSetWriteTombRecord(merger->writer, record); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbIterMergerNext(merger->tombIterMerger); + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbMergeFileSetEnd(merger); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(merger->tsdb->pVnode), __func__, lino, tstrerror(code)); + } else { + tsdbDebug("vgId:%d %s done, fid:%d", TD_VID(merger->tsdb->pVnode), __func__, fset->fid); + } + return code; +} + +static int32_t tsdbDoMerge(SMerger *merger) { + int32_t code = 0; + int32_t lino = 0; + + STFileSet *fset; + TARRAY2_FOREACH(merger->fsetArr, fset) { + if (TARRAY2_SIZE(fset->lvlArr) == 0) continue; + + SSttLvl *lvl = TARRAY2_FIRST(fset->lvlArr); + + if (lvl->level != 0 || TARRAY2_SIZE(lvl->fobjArr) < merger->sttTrigger) continue; + + if (!merger->ctx->opened) { + code = tsdbMergerOpen(merger); + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbMergeFileSet(merger, fset); + TSDB_CHECK_CODE(code, lino, _exit); + } + + if (merger->ctx->opened) { + code = tsdbMergerClose(merger); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(merger->tsdb->pVnode), lino, code); + } else { + tsdbDebug("vgId:%d %s done", TD_VID(merger->tsdb->pVnode), __func__); + } + return code; +} + +int32_t tsdbMerge(void *arg) { + int32_t code = 0; + int32_t lino = 0; + STsdb *tsdb = (STsdb *)arg; + + SMerger merger[1] = {{ + .tsdb = tsdb, + .sttTrigger = tsdb->pVnode->config.sttTrigger, + }}; + + ASSERT(merger->sttTrigger > 1); + + code = tsdbFSCreateCopySnapshot(tsdb->pFS, &merger->fsetArr); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbDoMerge(merger); + TSDB_CHECK_CODE(code, lino, _exit); + + tsdbFSDestroyCopySnapshot(&merger->fsetArr); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); + } else if (merger->ctx->opened) { + tsdbDebug("vgId:%d %s done", TD_VID(tsdb->pVnode), __func__); + } + return code; +} diff --git a/source/dnode/vnode/src/tsdb/tsdbMerge.h b/source/dnode/vnode/src/tsdb/tsdbMerge.h new file mode 100644 index 0000000000000000000000000000000000000000..69d802fd2776eddba8d65090dfe5717ba4bb76bc --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbMerge.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbDataFileRW.h" +#include "tsdbFS2.h" +#include "tsdbFSetRW.h" +#include "tsdbIter.h" +#include "tsdbSttFileRW.h" +#include "tsdbUtil2.h" + +#ifndef _TD_TSDB_MERGE_H_ +#define _TD_TSDB_MERGE_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +/* Exposed Handle */ + +/* Exposed APIs */ + +/* Exposed Structs */ + +#ifdef __cplusplus +} +#endif + +#endif /*_TD_TSDB_MERGE_H_*/ \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbMergeTree.c b/source/dnode/vnode/src/tsdb/tsdbMergeTree.c index 79f4a17f65430c3dc0efc8177c7f131db60934e3..dc83cf326ba818b5c045a8368cdc19d38b436962 100644 --- a/source/dnode/vnode/src/tsdb/tsdbMergeTree.c +++ b/source/dnode/vnode/src/tsdb/tsdbMergeTree.c @@ -14,6 +14,11 @@ */ #include "tsdb.h" +#include "tsdbFSet2.h" +#include "tsdbReadUtil.h" +#include "tsdbSttFileRW.h" + +static void tLDataIterClose2(SLDataIter *pIter); // SLDataIter ================================================= SSttBlockLoadInfo *tCreateLastBlockLoadInfo(STSchema *pSchema, int16_t *colList, int32_t numOfCols, @@ -24,8 +29,6 @@ SSttBlockLoadInfo *tCreateLastBlockLoadInfo(STSchema *pSchema, int16_t *colList, return NULL; } - pLoadInfo->numOfStt = numOfSttTrigger; - for (int32_t i = 0; i < numOfSttTrigger; ++i) { pLoadInfo[i].blockIndex[0] = -1; pLoadInfo[i].blockIndex[1] = -1; @@ -50,8 +53,37 @@ SSttBlockLoadInfo *tCreateLastBlockLoadInfo(STSchema *pSchema, int16_t *colList, return pLoadInfo; } +SSttBlockLoadInfo *tCreateOneLastBlockLoadInfo(STSchema *pSchema, int16_t *colList, int32_t numOfCols) { + SSttBlockLoadInfo *pLoadInfo = taosMemoryCalloc(1, sizeof(SSttBlockLoadInfo)); + if (pLoadInfo == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return NULL; + } + + pLoadInfo->blockIndex[0] = -1; + pLoadInfo->blockIndex[1] = -1; + pLoadInfo->currentLoadBlockIndex = 1; + + int32_t code = tBlockDataCreate(&pLoadInfo->blockData[0]); + if (code) { + terrno = code; + } + + code = tBlockDataCreate(&pLoadInfo->blockData[1]); + if (code) { + terrno = code; + } + + pLoadInfo->aSttBlk = taosArrayInit(4, sizeof(SSttBlk)); + pLoadInfo->pSchema = pSchema; + pLoadInfo->colIds = colList; + pLoadInfo->numOfCols = numOfCols; + + return pLoadInfo; +} + void resetLastBlockLoadInfo(SSttBlockLoadInfo *pLoadInfo) { - for (int32_t i = 0; i < pLoadInfo->numOfStt; ++i) { + for (int32_t i = 0; i < 1; ++i) { pLoadInfo[i].currentLoadBlockIndex = 1; pLoadInfo[i].blockIndex[0] = -1; pLoadInfo[i].blockIndex[1] = -1; @@ -65,18 +97,24 @@ void resetLastBlockLoadInfo(SSttBlockLoadInfo *pLoadInfo) { } void getLastBlockLoadInfo(SSttBlockLoadInfo *pLoadInfo, int64_t *blocks, double *el) { - for (int32_t i = 0; i < pLoadInfo->numOfStt; ++i) { + for (int32_t i = 0; i < 1; ++i) { *el += pLoadInfo[i].elapsedTime; *blocks += pLoadInfo[i].loadBlocks; } } +static void freeTombBlock(void *param) { + STombBlock **pTombBlock = (STombBlock **)param; + tTombBlockDestroy(*pTombBlock); + taosMemoryFree(*pTombBlock); +} + void *destroyLastBlockLoadInfo(SSttBlockLoadInfo *pLoadInfo) { if (pLoadInfo == NULL) { return NULL; } - for (int32_t i = 0; i < pLoadInfo->numOfStt; ++i) { + for (int32_t i = 0; i < 1; ++i) { pLoadInfo[i].currentLoadBlockIndex = 1; pLoadInfo[i].blockIndex[0] = -1; pLoadInfo[i].blockIndex[1] = -1; @@ -91,6 +129,33 @@ void *destroyLastBlockLoadInfo(SSttBlockLoadInfo *pLoadInfo) { return NULL; } +static void destroyLDataIter(SLDataIter *pIter) { + tLDataIterClose2(pIter); + destroyLastBlockLoadInfo(pIter->pBlockLoadInfo); + taosMemoryFree(pIter); +} + +void *destroySttBlockReader(SArray *pLDataIterArray, int64_t *blocks, double *el) { + if (pLDataIterArray == NULL) { + return NULL; + } + + int32_t numOfLevel = taosArrayGetSize(pLDataIterArray); + for (int32_t i = 0; i < numOfLevel; ++i) { + SArray *pList = taosArrayGetP(pLDataIterArray, i); + for (int32_t j = 0; j < taosArrayGetSize(pList); ++j) { + SLDataIter *pIter = taosArrayGetP(pList, j); + *el += pIter->pBlockLoadInfo->elapsedTime; + *blocks += pIter->pBlockLoadInfo->loadBlocks; + destroyLDataIter(pIter); + } + taosArrayDestroy(pList); + } + + taosArrayDestroy(pLDataIterArray); + return NULL; +} + static SBlockData *loadLastBlock(SLDataIter *pIter, const char *idStr) { int32_t code = 0; @@ -122,20 +187,8 @@ static SBlockData *loadLastBlock(SLDataIter *pIter, const char *idStr) { int64_t st = taosGetTimestampUs(); SBlockData *pBlock = &pInfo->blockData[pInfo->currentLoadBlockIndex]; - - TABLEID id = {0}; - if (pIter->pSttBlk->suid != 0) { - id.suid = pIter->pSttBlk->suid; - } else { - id.uid = pIter->uid; - } - - code = tBlockDataInit(pBlock, &id, pInfo->pSchema, pInfo->colIds, pInfo->numOfCols); - if (code != TSDB_CODE_SUCCESS) { - goto _exit; - } - - code = tsdbReadSttBlock(pIter->pReader, pIter->iStt, pIter->pSttBlk, pBlock); + code = tsdbSttFileReadBlockDataByColumn(pIter->pReader, pIter->pSttBlk, pBlock, pInfo->pSchema, &pInfo->colIds[1], + pInfo->numOfCols - 1); if (code != TSDB_CODE_SUCCESS) { goto _exit; } @@ -255,74 +308,160 @@ static int32_t binarySearchForStartRowIndex(uint64_t *uidList, int32_t num, uint int32_t tLDataIterOpen(struct SLDataIter *pIter, SDataFReader *pReader, int32_t iStt, int8_t backward, uint64_t suid, uint64_t uid, STimeWindow *pTimeWindow, SVersionRange *pRange, SSttBlockLoadInfo *pBlockLoadInfo, const char *idStr, bool strictTimeRange) { + return 0; +} + +static int32_t extractSttBlockInfo(SLDataIter *pIter, const TSttBlkArray *pArray, SSttBlockLoadInfo *pBlockLoadInfo, + uint64_t suid) { + if (TARRAY2_SIZE(pArray) <= 0) { + return TSDB_CODE_SUCCESS; + } + + SSttBlk *pStart = &pArray->data[0]; + SSttBlk *pEnd = &pArray->data[TARRAY2_SIZE(pArray) - 1]; + + // all identical + if (pStart->suid == pEnd->suid) { + if (pStart->suid != suid) { // no qualified stt block existed + taosArrayClear(pBlockLoadInfo->aSttBlk); + pIter->iSttBlk = -1; + return TSDB_CODE_SUCCESS; + } else { // all blocks are qualified + taosArrayClear(pBlockLoadInfo->aSttBlk); + taosArrayAddBatch(pBlockLoadInfo->aSttBlk, pArray->data, pArray->size); + } + } else { + SArray *pTmp = taosArrayInit(TARRAY2_SIZE(pArray), sizeof(SSttBlk)); + for (int32_t i = 0; i < TARRAY2_SIZE(pArray); ++i) { + SSttBlk *p = &pArray->data[i]; + if (p->suid < suid) { + continue; + } + + if (p->suid == suid) { + taosArrayPush(pTmp, p); + } else if (p->suid > suid) { + break; + } + } + + taosArrayDestroy(pBlockLoadInfo->aSttBlk); + pBlockLoadInfo->aSttBlk = pTmp; + } + + return TSDB_CODE_SUCCESS; +} + +static int32_t uidComparFn(const void *p1, const void *p2) { + const uint64_t *uid1 = p1; + const uint64_t *uid2 = p2; + return (*uid1) - (*uid2); +} + +static bool existsFromSttBlkStatis(const TStatisBlkArray *pStatisBlkArray, uint64_t suid, uint64_t uid, + SSttFileReader *pReader) { + if (TARRAY2_SIZE(pStatisBlkArray) <= 0) { + return true; + } + + int32_t i = 0; + for (i = 0; i < TARRAY2_SIZE(pStatisBlkArray); ++i) { + SStatisBlk *p = &pStatisBlkArray->data[i]; + if (p->minTbid.suid <= suid && p->maxTbid.suid >= suid) { + break; + } + } + + // for (; i < TARRAY2_SIZE(pStatisBlkArray); ++i) { + // SStatisBlk *p = &pStatisBlkArray->data[i]; + // if (p->minTbid.uid <= uid && p->maxTbid.uid >= uid) { + // break; + // } + // + // if (p->maxTbid.uid < uid) { + // break; + // } + // } + + if (i >= TARRAY2_SIZE(pStatisBlkArray)) { + return false; + } + + SStatisBlk *p = &pStatisBlkArray->data[i]; + STbStatisBlock block = {0}; + tsdbSttFileReadStatisBlock(pReader, p, &block); + + int32_t index = tarray2SearchIdx(block.uid, &uid, sizeof(int64_t), uidComparFn, TD_EQ); + tStatisBlockDestroy(&block); + + return (index != -1); +} + +int32_t tLDataIterOpen2(struct SLDataIter *pIter, SSttFileReader *pSttFileReader, int32_t iStt, int8_t backward, + uint64_t suid, uint64_t uid, STimeWindow *pTimeWindow, SVersionRange *pRange, + SSttBlockLoadInfo *pBlockLoadInfo, const char *idStr, bool strictTimeRange, + _load_tomb_fn loadTombFn, void *pReader1) { int32_t code = TSDB_CODE_SUCCESS; pIter->uid = uid; - pIter->pReader = pReader; pIter->iStt = iStt; pIter->backward = backward; pIter->verRange.minVer = pRange->minVer; pIter->verRange.maxVer = pRange->maxVer; pIter->timeWindow.skey = pTimeWindow->skey; pIter->timeWindow.ekey = pTimeWindow->ekey; - + pIter->pReader = pSttFileReader; pIter->pBlockLoadInfo = pBlockLoadInfo; + if (pIter->pReader == NULL) { + tsdbError("stt file reader is null, %s", idStr); + pIter->pSttBlk = NULL; + pIter->iSttBlk = -1; + return TSDB_CODE_SUCCESS; + } + if (!pBlockLoadInfo->sttBlockLoaded) { int64_t st = taosGetTimestampUs(); + + const TSttBlkArray *pSttBlkArray = NULL; pBlockLoadInfo->sttBlockLoaded = true; - code = tsdbReadSttBlk(pReader, iStt, pBlockLoadInfo->aSttBlk); - if (code) { + // load the stt block info for each stt-block + code = tsdbSttFileReadSttBlk(pIter->pReader, &pSttBlkArray); + if (code != TSDB_CODE_SUCCESS) { + tsdbError("load stt blk failed, code:%s, %s", tstrerror(code), idStr); return code; } - // only apply to the child tables, ordinary tables will not incur this filter procedure. - size_t size = taosArrayGetSize(pBlockLoadInfo->aSttBlk); - - if (size >= 1) { - SSttBlk *pStart = taosArrayGet(pBlockLoadInfo->aSttBlk, 0); - SSttBlk *pEnd = taosArrayGet(pBlockLoadInfo->aSttBlk, size - 1); - - // all identical - if (pStart->suid == pEnd->suid) { - if (pStart->suid != suid) { - // no qualified stt block existed - taosArrayClear(pBlockLoadInfo->aSttBlk); - - pIter->iSttBlk = -1; - double el = (taosGetTimestampUs() - st) / 1000.0; - tsdbDebug("load the last file info completed, elapsed time:%.2fms, %s", el, idStr); - return code; - } - } else { - SArray *pTmp = taosArrayInit(size, sizeof(SSttBlk)); - for (int32_t i = 0; i < size; ++i) { - SSttBlk *p = taosArrayGet(pBlockLoadInfo->aSttBlk, i); - uint64_t s = p->suid; - if (s < suid) { - continue; - } - - if (s == suid) { - taosArrayPush(pTmp, p); - } else if (s > suid) { - break; - } - } + code = extractSttBlockInfo(pIter, pSttBlkArray, pBlockLoadInfo, suid); + if (code != TSDB_CODE_SUCCESS) { + tsdbError("load stt block info failed, code:%s, %s", tstrerror(code), idStr); + return code; + } - taosArrayDestroy(pBlockLoadInfo->aSttBlk); - pBlockLoadInfo->aSttBlk = pTmp; - } + // load stt blocks statis for all stt-blocks, to decide if the data of queried table exists in current stt file + code = tsdbSttFileReadStatisBlk(pIter->pReader, (const TStatisBlkArray **)&pBlockLoadInfo->pSttStatisBlkArray); + if (code != TSDB_CODE_SUCCESS) { + tsdbError("failed to load stt block statistics, code:%s, %s", tstrerror(code), idStr); + return code; } + code = loadTombFn(pReader1, pIter->pReader, pIter->pBlockLoadInfo); + double el = (taosGetTimestampUs() - st) / 1000.0; - tsdbDebug("load the last file info completed, elapsed time:%.2fms, %s", el, idStr); + tsdbDebug("load the stt file info completed, elapsed time:%.2fms, %s", el, idStr); } - size_t size = taosArrayGetSize(pBlockLoadInfo->aSttBlk); + // bool exists = existsFromSttBlkStatis(pBlockLoadInfo->pSttStatisBlkArray, suid, uid, pIter->pReader); + // if (!exists) { + // pIter->iSttBlk = -1; + // pIter->pSttBlk = NULL; + // return TSDB_CODE_SUCCESS; + // } - // find the start block + // find the start block, actually we could load the position to avoid repeatly searching for the start position when + // the skey is updated. + size_t size = taosArrayGetSize(pBlockLoadInfo->aSttBlk); pIter->iSttBlk = binarySearchForStartBlock(pBlockLoadInfo->aSttBlk->pData, size, uid, backward); if (pIter->iSttBlk != -1) { pIter->pSttBlk = taosArrayGet(pBlockLoadInfo->aSttBlk, pIter->iSttBlk); @@ -343,7 +482,10 @@ int32_t tLDataIterOpen(struct SLDataIter *pIter, SDataFReader *pReader, int32_t return code; } -void tLDataIterClose(SLDataIter *pIter) { /*taosMemoryFree(pIter); */} +void tLDataIterClose2(SLDataIter *pIter) { + tsdbSttFileReaderClose(&pIter->pReader); + pIter->pReader = NULL; +} void tLDataIterNextBlock(SLDataIter *pIter, const char *idStr) { int32_t step = pIter->backward ? -1 : 1; @@ -395,25 +537,23 @@ void tLDataIterNextBlock(SLDataIter *pIter, const char *idStr) { if (index != -1) { pIter->iSttBlk = index; pIter->pSttBlk = (SSttBlk *)taosArrayGet(pIter->pBlockLoadInfo->aSttBlk, pIter->iSttBlk); - tsdbDebug("try next last file block:%d from %d, trigger by uid:%" PRIu64 ", file index:%d, %s", pIter->iSttBlk, - oldIndex, pIter->uid, pIter->iStt, idStr); + tsdbDebug("try next last file block:%d from stt fileIdx:%d, trigger by uid:%" PRIu64 ", file index:%d, %s", + pIter->iSttBlk, oldIndex, pIter->uid, pIter->iStt, idStr); } else { tsdbDebug("no more last block qualified, uid:%" PRIu64 ", file index:%d, %s", pIter->uid, oldIndex, idStr); } } static void findNextValidRow(SLDataIter *pIter, const char *idStr) { - int32_t step = pIter->backward ? -1 : 1; - bool hasVal = false; + int32_t step = pIter->backward ? -1 : 1; int32_t i = pIter->iRow; - SBlockData *pBlockData = loadLastBlock(pIter, idStr); + SBlockData *pData = loadLastBlock(pIter, idStr); // mostly we only need to find the start position for a given table - if ((((i == 0) && (!pIter->backward)) || (i == pBlockData->nRow - 1 && pIter->backward)) && - pBlockData->aUid != NULL) { - i = binarySearchForStartRowIndex((uint64_t *)pBlockData->aUid, pBlockData->nRow, pIter->uid, pIter->backward); + if ((((i == 0) && (!pIter->backward)) || (i == pData->nRow - 1 && pIter->backward)) && pData->aUid != NULL) { + i = binarySearchForStartRowIndex((uint64_t *)pData->aUid, pData->nRow, pIter->uid, pIter->backward); if (i == -1) { tsdbDebug("failed to find the data in pBlockData, uid:%" PRIu64 " , %s", pIter->uid, idStr); pIter->iRow = -1; @@ -421,20 +561,20 @@ static void findNextValidRow(SLDataIter *pIter, const char *idStr) { } } - for (; i < pBlockData->nRow && i >= 0; i += step) { - if (pBlockData->aUid != NULL) { + for (; i < pData->nRow && i >= 0; i += step) { + if (pData->aUid != NULL) { if (!pIter->backward) { - if (pBlockData->aUid[i] > pIter->uid) { + if (pData->aUid[i] > pIter->uid) { break; } } else { - if (pBlockData->aUid[i] < pIter->uid) { + if (pData->aUid[i] < pIter->uid) { break; } } } - int64_t ts = pBlockData->aTSKEY[i]; + int64_t ts = pData->aTSKEY[i]; if (!pIter->backward) { // asc if (ts > pIter->timeWindow.ekey) { // no more data break; @@ -449,7 +589,7 @@ static void findNextValidRow(SLDataIter *pIter, const char *idStr) { } } - int64_t ver = pBlockData->aVersion[i]; + int64_t ver = pData->aVersion[i]; if (ver < pIter->verRange.minVer) { continue; } @@ -485,7 +625,6 @@ bool tLDataIterNextRow(SLDataIter *pIter, const char *idStr) { while (1) { bool skipBlock = false; - findNextValidRow(pIter, idStr); if (pIter->pBlockLoadInfo->checkRemainingRow) { @@ -570,7 +709,7 @@ static FORCE_INLINE int32_t tLDataIterDescCmprFn(const SRBTreeNode *p1, const SR int32_t tMergeTreeOpen(SMergeTree *pMTree, int8_t backward, SDataFReader *pFReader, uint64_t suid, uint64_t uid, STimeWindow *pTimeWindow, SVersionRange *pVerRange, SSttBlockLoadInfo *pBlockLoadInfo, - bool destroyLoadInfo, const char *idStr, bool strictTimeRange, SLDataIter* pLDataIter) { + bool destroyLoadInfo, const char *idStr, bool strictTimeRange, SLDataIter *pLDataIter) { int32_t code = TSDB_CODE_SUCCESS; pMTree->backward = backward; @@ -612,6 +751,101 @@ _end: return code; } +int32_t tMergeTreeOpen2(SMergeTree *pMTree, SMergeTreeConf *pConf) { + int32_t code = TSDB_CODE_SUCCESS; + + pMTree->pIter = NULL; + pMTree->backward = pConf->backward; + pMTree->idStr = pConf->idstr; + + if (!pMTree->backward) { // asc + tRBTreeCreate(&pMTree->rbt, tLDataIterCmprFn); + } else { // desc + tRBTreeCreate(&pMTree->rbt, tLDataIterDescCmprFn); + } + + pMTree->ignoreEarlierTs = false; + + int32_t size = ((STFileSet *)pConf->pCurrentFileset)->lvlArr->size; + if (size == 0) { + goto _end; + } + + // add the list/iter placeholder + while (taosArrayGetSize(pConf->pSttFileBlockIterArray) < size) { + SArray *pList = taosArrayInit(4, POINTER_BYTES); + taosArrayPush(pConf->pSttFileBlockIterArray, &pList); + } + + for (int32_t j = 0; j < size; ++j) { + SSttLvl *pSttLevel = ((STFileSet *)pConf->pCurrentFileset)->lvlArr->data[j]; + ASSERT(pSttLevel->level == j); + + SArray *pList = taosArrayGetP(pConf->pSttFileBlockIterArray, j); + int32_t numOfIter = taosArrayGetSize(pList); + + if (numOfIter < TARRAY2_SIZE(pSttLevel->fobjArr)) { + int32_t inc = TARRAY2_SIZE(pSttLevel->fobjArr) - numOfIter; + for (int32_t k = 0; k < inc; ++k) { + SLDataIter *pIter = taosMemoryCalloc(1, sizeof(SLDataIter)); + taosArrayPush(pList, &pIter); + } + } else if (numOfIter > TARRAY2_SIZE(pSttLevel->fobjArr)){ + int32_t inc = numOfIter - TARRAY2_SIZE(pSttLevel->fobjArr); + for (int i = 0; i < inc; ++i) { + SLDataIter *pIter = taosArrayPop(pList); + destroyLDataIter(pIter); + } + } + + for (int32_t i = 0; i < TARRAY2_SIZE(pSttLevel->fobjArr); ++i) { // open all last file + SLDataIter *pIter = taosArrayGetP(pList, i); + + SSttFileReader *pSttFileReader = pIter->pReader; + SSttBlockLoadInfo *pLoadInfo = pIter->pBlockLoadInfo; + + // open stt file reader if not + if (pSttFileReader == NULL) { + SSttFileReaderConfig conf = {.tsdb = pConf->pTsdb, .szPage = pConf->pTsdb->pVnode->config.tsdbPageSize}; + conf.file[0] = *pSttLevel->fobjArr->data[i]->f; + + code = tsdbSttFileReaderOpen(pSttLevel->fobjArr->data[i]->fname, &conf, &pSttFileReader); + if (code != TSDB_CODE_SUCCESS) { + tsdbError("open stt file reader error. file name %s, code %s, %s", pSttLevel->fobjArr->data[i]->fname, + tstrerror(code), pMTree->idStr); + } + } + + if (pLoadInfo == NULL) { + pLoadInfo = tCreateOneLastBlockLoadInfo(pConf->pSchema, pConf->pCols, pConf->numOfCols); + } + + memset(pIter, 0, sizeof(SLDataIter)); + code = tLDataIterOpen2(pIter, pSttFileReader, i, pMTree->backward, pConf->suid, pConf->uid, &pConf->timewindow, + &pConf->verRange, pLoadInfo, pMTree->idStr, pConf->strictTimeRange, pConf->loadTombFn, + pConf->pReader); + if (code != TSDB_CODE_SUCCESS) { + goto _end; + } + + bool hasVal = tLDataIterNextRow(pIter, pMTree->idStr); + if (hasVal) { + tMergeTreeAddIter(pMTree, pIter); + } else { + if (!pMTree->ignoreEarlierTs) { + pMTree->ignoreEarlierTs = pIter->ignoreEarlierTs; + } + } + } + } + + return code; + +_end: + tMergeTreeClose(pMTree); + return code; +} + void tMergeTreeAddIter(SMergeTree *pMTree, SLDataIter *pIter) { tRBTreePut(&pMTree->rbt, (SRBTreeNode *)pIter); } bool tMergeTreeIgnoreEarlierTs(SMergeTree *pMTree) { return pMTree->ignoreEarlierTs; } diff --git a/source/dnode/vnode/src/tsdb/tsdbOpen.c b/source/dnode/vnode/src/tsdb/tsdbOpen.c index 8901f644598ec4ca5343f4a35a7b063bf39096fd..c684ad51848d1a6af49ff2881f05b96eca6e51dc 100644 --- a/source/dnode/vnode/src/tsdb/tsdbOpen.c +++ b/source/dnode/vnode/src/tsdb/tsdbOpen.c @@ -14,6 +14,7 @@ */ #include "tsdb.h" +#include "tsdbFS2.h" int32_t tsdbSetKeepCfg(STsdb *pTsdb, STsdbCfg *pCfg) { STsdbKeepCfg *pKeepCfg = &pTsdb->keepCfg; @@ -66,7 +67,7 @@ int tsdbOpen(SVnode *pVnode, STsdb **ppTsdb, const char *dir, STsdbKeepCfg *pKee } // open tsdb - if (tsdbFSOpen(pTsdb, rollback) < 0) { + if (tsdbOpenFS(pTsdb, &pTsdb->pFS, rollback) < 0) { goto _err; } @@ -94,7 +95,7 @@ int tsdbClose(STsdb **pTsdb) { taosThreadRwlockDestroy(&(*pTsdb)->rwLock); - tsdbFSClose(*pTsdb); + tsdbCloseFS(&(*pTsdb)->pFS); tsdbCloseCache(*pTsdb); taosMemoryFreeClear(*pTsdb); } diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index cfeb1288d452f42c3f05cc0bde53134a45b262a5..2aa21bd86f31e8167d0f5f0fd9406dad9a50d513 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -17,7 +17,7 @@ #include "tsdb.h" #include "tsimplehash.h" -#define ASCENDING_TRAVERSE(o) (o == TSDB_ORDER_ASC) +#define ASCENDING_TRAVERSE(o) (o == TSDB_ORDER_ASC) #define getCurrentKeyInLastBlock(_r) ((_r)->currentKey) typedef enum { @@ -30,12 +30,12 @@ typedef enum { EXTERNAL_ROWS_MAIN = 0x2, EXTERNAL_ROWS_NEXT = 0x3, } EContentData; - +/* typedef enum { READ_MODE_COUNT_ONLY = 0x1, READ_MODE_ALL, } EReadMode; - +*/ typedef struct { STbDataIter* iter; int32_t index; @@ -166,7 +166,7 @@ typedef struct SReaderStatus { SDataBlockIter blockIter; SLDataIter* pLDataIter; SRowMerger merger; - SColumnInfoData* pPrimaryTsCol; // primary time stamp output col info data + SColumnInfoData* pPrimaryTsCol; // primary time stamp output col info data } SReaderStatus; typedef struct SBlockInfoBuf { @@ -292,7 +292,7 @@ static int32_t updateBlockSMAInfo(STSchema* pSchema, SBlockLoadSuppInfo* pSupInf if (j < pSupInfo->numOfCols && PRIMARYKEY_TIMESTAMP_COL_ID == pSupInfo->colId[j]) { j += 1; } - + while (i < pSchema->numOfCols && j < pSupInfo->numOfCols) { STColumn* pTCol = &pSchema->columns[i]; if (pTCol->colId == pSupInfo->colId[j]) { @@ -410,7 +410,7 @@ static int32_t uidComparFunc(const void* p1, const void* p2) { // NOTE: speedup the whole processing by preparing the buffer for STableBlockScanInfo in batch model static SSHashObj* createDataBlockScanInfo(STsdbReader* pTsdbReader, SBlockInfoBuf* pBuf, const STableKeyInfo* idList, - STableUidList* pUidList, int32_t numOfTables) { + STableUidList* pUidList, int32_t numOfTables) { // allocate buffer in order to load data blocks from file // todo use simple hash instead, optimize the memory consumption SSHashObj* pTableMap = tSimpleHashInit(numOfTables, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT)); @@ -461,7 +461,7 @@ static SSHashObj* createDataBlockScanInfo(STsdbReader* pTsdbReader, SBlockInfoBu } static void resetAllDataBlockScanInfo(SSHashObj* pTableMap, int64_t ts, int32_t step) { - void *p = NULL; + void* p = NULL; int32_t iter = 0; while ((p = tSimpleHashIterate(pTableMap, p, &iter)) != NULL) { @@ -505,7 +505,7 @@ static void clearBlockScanInfo(STableBlockScanInfo* p) { } static void destroyAllBlockScanInfo(SSHashObj* pTableMap) { - void* p = NULL; + void* p = NULL; int32_t iter = 0; while ((p = tSimpleHashIterate(pTableMap, p, &iter)) != NULL) { @@ -743,7 +743,8 @@ void tsdbReleaseDataBlock(STsdbReader* pReader) { } } -static int32_t initResBlockInfo(SResultBlockInfo* pResBlockInfo, int64_t capacity, SSDataBlock* pResBlock, SQueryTableDataCond* pCond) { +static int32_t initResBlockInfo(SResultBlockInfo* pResBlockInfo, int64_t capacity, SSDataBlock* pResBlock, + SQueryTableDataCond* pCond) { pResBlockInfo->capacity = capacity; pResBlockInfo->pResBlock = pResBlock; terrno = 0; @@ -921,9 +922,9 @@ static void cleanupTableScanInfo(SReaderStatus* pStatus) { return; } - SSHashObj* pTableMap = pStatus->pTableMap; + SSHashObj* pTableMap = pStatus->pTableMap; STableBlockScanInfo** px = NULL; - int32_t iter = 0; + int32_t iter = 0; while (1) { px = tSimpleHashIterate(pTableMap, px, &iter); @@ -937,9 +938,10 @@ static void cleanupTableScanInfo(SReaderStatus* pStatus) { pStatus->mapDataCleaned = true; } -static int32_t doLoadFileBlock(STsdbReader* pReader, SArray* pIndexList, SBlockNumber* pBlockNum, SArray* pTableScanInfoList) { - size_t sizeInDisk = 0; - size_t numOfTables = taosArrayGetSize(pIndexList); +static int32_t doLoadFileBlock(STsdbReader* pReader, SArray* pIndexList, SBlockNumber* pBlockNum, + SArray* pTableScanInfoList) { + size_t sizeInDisk = 0; + size_t numOfTables = taosArrayGetSize(pIndexList); int64_t st = taosGetTimestampUs(); cleanupTableScanInfo(&pReader->status); @@ -1125,18 +1127,18 @@ static int32_t getEndPosInDataBlock(STsdbReader* pReader, SBlockData* pBlockData endPos = doBinarySearchKey(pBlockData->aTSKEY, pBlock->nRow, pos, key, pReader->order); } - if ((pReader->verRange.maxVer >= pBlock->minVer && pReader->verRange.maxVer < pBlock->maxVer)|| + if ((pReader->verRange.maxVer >= pBlock->minVer && pReader->verRange.maxVer < pBlock->maxVer) || (pReader->verRange.minVer <= pBlock->maxVer && pReader->verRange.minVer > pBlock->minVer)) { int32_t i = endPos; if (asc) { - for(; i >= 0; --i) { + for (; i >= 0; --i) { if (pBlockData->aVersion[i] <= pReader->verRange.maxVer) { break; } } } else { - for(; i < pBlock->nRow; ++i) { + for (; i < pBlock->nRow; ++i) { if (pBlockData->aVersion[i] >= pReader->verRange.minVer) { break; } @@ -1309,17 +1311,17 @@ static int32_t copyBlockDataToSDataBlock(STsdbReader* pReader) { ASSERT(pReader->verRange.minVer <= pBlock->maxVer && pReader->verRange.maxVer >= pBlock->minVer); // find the appropriate start position that satisfies the version requirement. - if ((pReader->verRange.maxVer >= pBlock->minVer && pReader->verRange.maxVer < pBlock->maxVer)|| + if ((pReader->verRange.maxVer >= pBlock->minVer && pReader->verRange.maxVer < pBlock->maxVer) || (pReader->verRange.minVer <= pBlock->maxVer && pReader->verRange.minVer > pBlock->minVer)) { int32_t i = pDumpInfo->rowIndex; if (asc) { - for(; i < pBlock->nRow; ++i) { + for (; i < pBlock->nRow; ++i) { if (pBlockData->aVersion[i] >= pReader->verRange.minVer) { break; } } } else { - for(; i >= 0; --i) { + for (; i >= 0; --i) { if (pBlockData->aVersion[i] <= pReader->verRange.maxVer) { break; } @@ -1562,7 +1564,8 @@ static int32_t doSetCurrentBlock(SDataBlockIter* pBlockIter, const char* idStr) return TSDB_CODE_SUCCESS; } -static int32_t initBlockIterator(STsdbReader* pReader, SDataBlockIter* pBlockIter, int32_t numOfBlocks, SArray* pTableList) { +static int32_t initBlockIterator(STsdbReader* pReader, SDataBlockIter* pBlockIter, int32_t numOfBlocks, + SArray* pTableList) { bool asc = ASCENDING_TRAVERSE(pReader->order); SBlockOrderSupporter sup = {0}; @@ -1967,13 +1970,14 @@ static bool nextRowFromLastBlocks(SLastBlockReader* pLastBlockReader, STableBloc } TSDBROW* pRow = tMergeTreeGetRow(&pLastBlockReader->mergeTree); - int64_t key = pRow->pBlockData->aTSKEY[pRow->iRow]; - int64_t ver = pRow->pBlockData->aVersion[pRow->iRow]; + int64_t key = pRow->pBlockData->aTSKEY[pRow->iRow]; + int64_t ver = pRow->pBlockData->aVersion[pRow->iRow]; pLastBlockReader->currentKey = key; pScanInfo->lastKeyInStt = key; - if (!hasBeenDropped(pScanInfo->delSkyline, &pScanInfo->lastBlockDelIndex, key, ver, pLastBlockReader->order, pVerRange)) { + if (!hasBeenDropped(pScanInfo->delSkyline, &pScanInfo->lastBlockDelIndex, key, ver, pLastBlockReader->order, + pVerRange)) { return true; } } @@ -2030,7 +2034,7 @@ static FORCE_INLINE STSchema* doGetSchemaForTSRow(int32_t sversion, STsdbReader* } STSchema* ptr = NULL; - int32_t code = metaGetTbTSchemaEx(pReader->pTsdb->pVnode->pMeta, pReader->suid, uid, sversion, &ptr); + int32_t code = metaGetTbTSchemaEx(pReader->pTsdb->pVnode->pMeta, pReader->suid, uid, sversion, &ptr); if (code != TSDB_CODE_SUCCESS) { terrno = code; return NULL; @@ -2153,7 +2157,7 @@ static int32_t doMergeBufAndFileRows(STsdbReader* pReader, STableBlockScanInfo* return terrno; } - int32_t code = tsdbRowMergerAdd(pMerger, pRow, pSchema); + int32_t code = tsdbRowMergerAdd(pMerger, pRow, pSchema); if (code != TSDB_CODE_SUCCESS) { return code; } @@ -2208,7 +2212,7 @@ static int32_t doMergeBufAndFileRows(STsdbReader* pReader, STableBlockScanInfo* static int32_t doMergeFileBlockAndLastBlock(SLastBlockReader* pLastBlockReader, STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo, SBlockData* pBlockData, bool mergeBlockData) { - SRowMerger* pMerger = &pReader->status.merger; + SRowMerger* pMerger = &pReader->status.merger; SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; int64_t tsLastBlock = getCurrentKeyInLastBlock(pLastBlockReader); @@ -2218,9 +2222,10 @@ static int32_t doMergeFileBlockAndLastBlock(SLastBlockReader* pLastBlockReader, TSDBROW* pRow = tMergeTreeGetRow(&pLastBlockReader->mergeTree); // create local variable to hold the row value - TSDBROW fRow = {.iRow = pRow->iRow, .type = TSDBROW_COL_FMT, .pBlockData = pRow->pBlockData}; + TSDBROW fRow = {.iRow = pRow->iRow, .type = TSDBROW_COL_FMT, .pBlockData = pRow->pBlockData}; - tsdbTrace("fRow ptr:%p, %d, uid:%" PRIu64 ", %s", pRow->pBlockData, pRow->iRow, pLastBlockReader->uid, pReader->idStr); + tsdbTrace("fRow ptr:%p, %d, uid:%" PRIu64 ", %s", pRow->pBlockData, pRow->iRow, pLastBlockReader->uid, + pReader->idStr); // only last block exists if ((!mergeBlockData) || (tsLastBlock != pBlockData->aTSKEY[pDumpInfo->rowIndex])) { @@ -2240,7 +2245,8 @@ static int32_t doMergeFileBlockAndLastBlock(SLastBlockReader* pLastBlockReader, TSDBROW* pRow1 = tMergeTreeGetRow(&pLastBlockReader->mergeTree); tsdbRowMergerAdd(pMerger, pRow1, NULL); - doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, tsLastBlock, pMerger, &pReader->verRange, pReader->idStr); + doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, tsLastBlock, pMerger, &pReader->verRange, + pReader->idStr); code = tsdbRowMergerGetRow(pMerger, &pTSRow); if (code != TSDB_CODE_SUCCESS) { @@ -2290,7 +2296,7 @@ static int32_t doMergeFileBlockAndLastBlock(SLastBlockReader* pLastBlockReader, static int32_t mergeFileBlockAndLastBlock(STsdbReader* pReader, SLastBlockReader* pLastBlockReader, int64_t key, STableBlockScanInfo* pBlockScanInfo, SBlockData* pBlockData) { SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; - SRowMerger* pMerger = &pReader->status.merger; + SRowMerger* pMerger = &pReader->status.merger; // merge is not initialized yet, due to the fact that the pReader->pSchema is not initialized if (pMerger->pArray == NULL) { @@ -2316,7 +2322,7 @@ static int32_t mergeFileBlockAndLastBlock(STsdbReader* pReader, SLastBlockReader if (key < ts) { // imem, mem are all empty, file blocks (data blocks and last block) exist return mergeRowsInFileBlocks(pBlockData, pBlockScanInfo, key, pReader); } else if (key == ts) { - SRow* pTSRow = NULL; + SRow* pTSRow = NULL; int32_t code = tsdbRowMergerAdd(pMerger, &fRow, pReader->pSchema); if (code != TSDB_CODE_SUCCESS) { return code; @@ -2723,7 +2729,7 @@ int32_t mergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pBloc } else { TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex); - SRow* pTSRow = NULL; + SRow* pTSRow = NULL; code = tsdbRowMergerAdd(pMerger, &fRow, pReader->pSchema); if (code != TSDB_CODE_SUCCESS) { return code; @@ -2837,11 +2843,11 @@ static int32_t buildComposedDataBlock(STsdbReader* pReader) { SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(&pReader->status.blockIter); SLastBlockReader* pLastBlockReader = pReader->status.fileIter.pLastBlockReader; - bool asc = ASCENDING_TRAVERSE(pReader->order); - int64_t st = taosGetTimestampUs(); - int32_t step = asc ? 1 : -1; - double el = 0; - SDataBlk* pBlock = getCurrentBlock(&pReader->status.blockIter); + bool asc = ASCENDING_TRAVERSE(pReader->order); + int64_t st = taosGetTimestampUs(); + int32_t step = asc ? 1 : -1; + double el = 0; + SDataBlk* pBlock = getCurrentBlock(&pReader->status.blockIter); SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; STableBlockScanInfo* pBlockScanInfo = NULL; @@ -2874,7 +2880,8 @@ static int32_t buildComposedDataBlock(STsdbReader* pReader) { } } else { // file blocks not exist pBlockScanInfo = *pReader->status.pTableIter; - if (pReader->pIgnoreTables && taosHashGet(*pReader->pIgnoreTables, &pBlockScanInfo->uid, sizeof(pBlockScanInfo->uid))) { + if (pReader->pIgnoreTables && + taosHashGet(*pReader->pIgnoreTables, &pBlockScanInfo->uid, sizeof(pBlockScanInfo->uid))) { setBlockAllDumped(pDumpInfo, pBlock->maxKey.ts, pReader->order); return code; } @@ -3172,6 +3179,16 @@ static int32_t doLoadLastBlockSequentially(STsdbReader* pReader) { // load the last data block of current table STableBlockScanInfo* pScanInfo = *(STableBlockScanInfo**)pStatus->pTableIter; + if (pScanInfo == NULL) { + tsdbError("table Iter is null, invalid pScanInfo, try next table %s", pReader->idStr); + bool hasNexTable = moveToNextTable(pUidList, pStatus); + if (!hasNexTable) { + return TSDB_CODE_SUCCESS; + } + + continue; + } + if (pReader->pIgnoreTables && taosHashGet(*pReader->pIgnoreTables, &pScanInfo->uid, sizeof(pScanInfo->uid))) { // reset the index in last block when handing a new file doCleanupTableScanInfo(pScanInfo); @@ -3238,7 +3255,7 @@ static int32_t doLoadLastBlockSequentially(STsdbReader* pReader) { } static int32_t doBuildDataBlock(STsdbReader* pReader) { - int32_t code = TSDB_CODE_SUCCESS; + int32_t code = TSDB_CODE_SUCCESS; SReaderStatus* pStatus = &pReader->status; SDataBlockIter* pBlockIter = &pStatus->blockIter; @@ -3261,7 +3278,6 @@ static int32_t doBuildDataBlock(STsdbReader* pReader) { return terrno; } - initLastBlockReader(pLastBlockReader, pScanInfo, pReader); TSDBKEY keyInBuf = getCurrentKeyInBuf(pScanInfo, pReader); @@ -3338,7 +3354,7 @@ static int32_t doBuildDataBlock(STsdbReader* pReader) { } } - return (pReader->code != TSDB_CODE_SUCCESS)? pReader->code:code; + return (pReader->code != TSDB_CODE_SUCCESS) ? pReader->code : code; } static int32_t doSumFileBlockRows(STsdbReader* pReader, SDataFReader* pFileReader) { @@ -3493,14 +3509,15 @@ static int32_t buildBlockFromBufferSequentially(STsdbReader* pReader) { } STableBlockScanInfo** pBlockScanInfo = pStatus->pTableIter; - if (pReader->pIgnoreTables && taosHashGet(*pReader->pIgnoreTables, &(*pBlockScanInfo)->uid, sizeof((*pBlockScanInfo)->uid))) { + if (pReader->pIgnoreTables && + taosHashGet(*pReader->pIgnoreTables, &(*pBlockScanInfo)->uid, sizeof((*pBlockScanInfo)->uid))) { bool hasNexTable = moveToNextTable(pUidList, pStatus); if (!hasNexTable) { return TSDB_CODE_SUCCESS; } pBlockScanInfo = pStatus->pTableIter; } - + initMemDataIterator(*pBlockScanInfo, pReader); int64_t endKey = (ASCENDING_TRAVERSE(pReader->order)) ? INT64_MAX : INT64_MIN; @@ -3544,7 +3561,7 @@ static void initBlockDumpInfo(STsdbReader* pReader, SDataBlockIter* pBlockIter) static int32_t initForFirstBlockInFile(STsdbReader* pReader, SDataBlockIter* pBlockIter) { SBlockNumber num = {0}; - SArray* pTableList = taosArrayInit(40, POINTER_BYTES); + SArray* pTableList = taosArrayInit(40, POINTER_BYTES); int32_t code = moveToNextFile(pReader, &num, pTableList); if (code != TSDB_CODE_SUCCESS) { @@ -3589,7 +3606,7 @@ static ERetrieveType doReadDataFromLastFiles(STsdbReader* pReader) { SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; SDataBlockIter* pBlockIter = &pReader->status.blockIter; - while(1) { + while (1) { terrno = 0; code = doLoadLastBlockSequentially(pReader); @@ -3612,7 +3629,7 @@ static ERetrieveType doReadDataFromLastFiles(STsdbReader* pReader) { return TSDB_READ_RETURN; } - if (pBlockIter->numOfBlocks > 0) { // there are data blocks existed. + if (pBlockIter->numOfBlocks > 0) { // there are data blocks existed. return TSDB_READ_CONTINUE; } else { // all blocks in data file are checked, let's check the data in last files resetTableListIndex(&pReader->status); @@ -3625,7 +3642,7 @@ static int32_t buildBlockFromFiles(STsdbReader* pReader) { bool asc = ASCENDING_TRAVERSE(pReader->order); SDataBlockIter* pBlockIter = &pReader->status.blockIter; - SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; + SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; if (pBlockIter->numOfBlocks == 0) { // let's try to extract data from stt files. @@ -3737,13 +3754,14 @@ SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, const endVer = (pCond->endVersion > pVnode->state.applied) ? pVnode->state.applied : pCond->endVersion; } - tsdbDebug("queried verRange:%"PRId64"-%"PRId64", revised query verRange:%"PRId64"-%"PRId64", %s", pCond->startVersion, - pCond->endVersion, startVer, endVer, id); + tsdbDebug("queried verRange:%" PRId64 "-%" PRId64 ", revised query verRange:%" PRId64 "-%" PRId64 ", %s", + pCond->startVersion, pCond->endVersion, startVer, endVer, id); return (SVersionRange){.minVer = startVer, .maxVer = endVer}; } -bool hasBeenDropped(const SArray* pDelList, int32_t* index, int64_t key, int64_t ver, int32_t order, SVersionRange* pVerRange) { +bool hasBeenDropped(const SArray* pDelList, int32_t* index, int64_t key, int64_t ver, int32_t order, + SVersionRange* pVerRange) { if (pDelList == NULL) { return false; } @@ -3761,8 +3779,7 @@ bool hasBeenDropped(const SArray* pDelList, int32_t* index, int64_t key, int64_t return false; } else if (key == last->ts) { TSDBKEY* prev = taosArrayGet(pDelList, num - 2); - return (prev->version >= ver && prev->version <= pVerRange->maxVer && - prev->version >= pVerRange->minVer); + return (prev->version >= ver && prev->version <= pVerRange->maxVer && prev->version >= pVerRange->minVer); } } else { TSDBKEY* pCurrent = taosArrayGet(pDelList, *index); @@ -3971,9 +3988,9 @@ int32_t doMergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pSc SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; SRowMerger* pMerger = &pReader->status.merger; - bool asc = ASCENDING_TRAVERSE(pReader->order); - int64_t key = pBlockData->aTSKEY[pDumpInfo->rowIndex]; - int32_t step = asc ? 1 : -1; + bool asc = ASCENDING_TRAVERSE(pReader->order); + int64_t key = pBlockData->aTSKEY[pDumpInfo->rowIndex]; + int32_t step = asc ? 1 : -1; pDumpInfo->rowIndex += step; if ((pDumpInfo->rowIndex <= pBlockData->nRow - 1 && asc) || (pDumpInfo->rowIndex >= 0 && !asc)) { @@ -4070,14 +4087,14 @@ int32_t doMergeMemTableMultiRows(TSDBROW* pRow, uint64_t uid, SIterInfo* pIter, return terrno; } - tsdbRowMergerAdd(&pReader->status.merger,pNextRow, pTSchema1); + tsdbRowMergerAdd(&pReader->status.merger, pNextRow, pTSchema1); } else { // let's merge rows in file block code = tsdbRowMergerAdd(&pReader->status.merger, ¤t, pReader->pSchema); if (code != TSDB_CODE_SUCCESS) { return code; } - tsdbRowMergerAdd(&pReader->status.merger,pNextRow, NULL); + tsdbRowMergerAdd(&pReader->status.merger, pNextRow, NULL); } code = doMergeRowsInBuf(pIter, uid, TSDBROW_TS(¤t), pDelList, pReader); @@ -4124,9 +4141,8 @@ int32_t doMergeMemIMemRows(TSDBROW* pRow, TSDBROW* piRow, STableBlockScanInfo* p return code; } - tsdbRowMergerAdd(&pReader->status.merger,pRow, pSchema); - code = - doMergeRowsInBuf(&pBlockScanInfo->iter, pBlockScanInfo->uid, k.ts, pBlockScanInfo->delSkyline, pReader); + tsdbRowMergerAdd(&pReader->status.merger, pRow, pSchema); + code = doMergeRowsInBuf(&pBlockScanInfo->iter, pBlockScanInfo->uid, k.ts, pBlockScanInfo->delSkyline, pReader); if (code != TSDB_CODE_SUCCESS) { return code; } @@ -4365,7 +4381,7 @@ int32_t tsdbSetTableList(STsdbReader* pReader, const void* pTableList, int32_t n int32_t size = tSimpleHashGetSize(pReader->status.pTableMap); STableBlockScanInfo** p = NULL; - int32_t iter = 0; + int32_t iter = 0; while ((p = tSimpleHashIterate(pReader->status.pTableMap, p, &iter)) != NULL) { clearBlockScanInfo(*p); @@ -4452,15 +4468,16 @@ static int32_t doOpenReaderImpl(STsdbReader* pReader) { } static void freeSchemaFunc(void* param) { - void **p = (void **)param; + void** p = (void**)param; taosMemoryFreeClear(*p); } // ====================================== EXPOSED APIs ====================================== int32_t tsdbReaderOpen(void* pVnode, SQueryTableDataCond* pCond, void* pTableList, int32_t numOfTables, - SSDataBlock* pResBlock, void** ppReader, const char* idstr, bool countOnly, SHashObj** pIgnoreTables) { + SSDataBlock* pResBlock, void** ppReader, const char* idstr, bool countOnly, + SHashObj** pIgnoreTables) { STimeWindow window = pCond->twindows; - SVnodeCfg* pConf = &(((SVnode*)pVnode)->config); + SVnodeCfg* pConf = &(((SVnode*)pVnode)->config); int32_t capacity = pConf->tsdbCfg.maxRows; if (pResBlock != NULL) { @@ -4729,7 +4746,7 @@ int32_t tsdbReaderSuspend(STsdbReader* pReader) { // resetDataBlockScanInfo excluding lastKey STableBlockScanInfo** p = NULL; - int32_t iter = 0; + int32_t iter = 0; while ((p = tSimpleHashIterate(pStatus->pTableMap, p, &iter)) != NULL) { STableBlockScanInfo* pInfo = *(STableBlockScanInfo**)p; @@ -4751,7 +4768,7 @@ int32_t tsdbReaderSuspend(STsdbReader* pReader) { } else { // resetDataBlockScanInfo excluding lastKey STableBlockScanInfo** p = NULL; - int32_t iter = 0; + int32_t iter = 0; while ((p = tSimpleHashIterate(pStatus->pTableMap, p, &iter)) != NULL) { STableBlockScanInfo* pInfo = *(STableBlockScanInfo**)p; @@ -4950,8 +4967,9 @@ int32_t tsdbNextDataBlock(STsdbReader* pReader, bool* hasNext) { *hasNext = false; - if (isEmptyQueryTimeWindow(&pReader->window) || pReader->step == EXTERNAL_ROWS_NEXT || pReader->code != TSDB_CODE_SUCCESS) { - return (pReader->code != TSDB_CODE_SUCCESS)? pReader->code:code; + if (isEmptyQueryTimeWindow(&pReader->window) || pReader->step == EXTERNAL_ROWS_NEXT || + pReader->code != TSDB_CODE_SUCCESS) { + return (pReader->code != TSDB_CODE_SUCCESS) ? pReader->code : code; } SReaderStatus* pStatus = &pReader->status; @@ -5087,7 +5105,7 @@ static bool doFillNullColSMA(SBlockLoadSuppInfo* pSup, int32_t numOfRows, int32_ return hasNullSMA; } -int32_t tsdbRetrieveDatablockSMA(STsdbReader* pReader, SSDataBlock* pDataBlock, bool* allHave, bool *hasNullSMA) { +int32_t tsdbRetrieveDatablockSMA(STsdbReader* pReader, SSDataBlock* pDataBlock, bool* allHave, bool* hasNullSMA) { SColumnDataAgg*** pBlockSMA = &pDataBlock->pBlockAgg; int32_t code = 0; @@ -5196,9 +5214,9 @@ STableBlockScanInfo* getTableBlockScanInfo(SSHashObj* pTableMap, uint64_t uid, c } static SSDataBlock* doRetrieveDataBlock(STsdbReader* pReader) { - SReaderStatus* pStatus = &pReader->status; - int32_t code = TSDB_CODE_SUCCESS; - SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(&pStatus->blockIter); + SReaderStatus* pStatus = &pReader->status; + int32_t code = TSDB_CODE_SUCCESS; + SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(&pStatus->blockIter); if (pReader->code != TSDB_CODE_SUCCESS) { return NULL; diff --git a/source/dnode/vnode/src/tsdb/tsdbRead2.c b/source/dnode/vnode/src/tsdb/tsdbRead2.c new file mode 100644 index 0000000000000000000000000000000000000000..e1756333c536e7158b823d078bffae6e79e76f92 --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbRead2.c @@ -0,0 +1,4950 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "osDef.h" +#include "tsdb.h" +#include "tsdbDataFileRW.h" +#include "tsdbFS2.h" +#include "tsdbMerge.h" +#include "tsdbReadUtil.h" +#include "tsdbUtil2.h" +#include "tsimplehash.h" + +#define ASCENDING_TRAVERSE(o) (o == TSDB_ORDER_ASC) +#define getCurrentKeyInLastBlock(_r) ((_r)->currentKey) + +static SFileDataBlockInfo* getCurrentBlockInfo(SDataBlockIter* pBlockIter); +static int32_t buildDataBlockFromBufImpl(STableBlockScanInfo* pBlockScanInfo, int64_t endKey, int32_t capacity, + STsdbReader* pReader); +static TSDBROW* getValidMemRow(SIterInfo* pIter, const SArray* pDelList, STsdbReader* pReader); +static int32_t doMergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pScanInfo, STsdbReader* pReader); +static int32_t doMergeRowsInLastBlock(SLastBlockReader* pLastBlockReader, STableBlockScanInfo* pScanInfo, int64_t ts, + SRowMerger* pMerger, SVersionRange* pVerRange, const char* id); +static int32_t doMergeRowsInBuf(SIterInfo* pIter, uint64_t uid, int64_t ts, SArray* pDelList, STsdbReader* pReader); +static int32_t doAppendRowFromTSRow(SSDataBlock* pBlock, STsdbReader* pReader, SRow* pTSRow, + STableBlockScanInfo* pScanInfo); +static int32_t doAppendRowFromFileBlock(SSDataBlock* pResBlock, STsdbReader* pReader, SBlockData* pBlockData, + int32_t rowIndex); +static void setComposedBlockFlag(STsdbReader* pReader, bool composed); +static bool hasBeenDropped(const SArray* pDelList, int32_t* index, int64_t key, int64_t ver, int32_t order, + SVersionRange* pVerRange); + +static int32_t doMergeMemTableMultiRows(TSDBROW* pRow, uint64_t uid, SIterInfo* pIter, SArray* pDelList, + TSDBROW* pResRow, STsdbReader* pReader, bool* freeTSRow); +static int32_t doMergeMemIMemRows(TSDBROW* pRow, TSDBROW* piRow, STableBlockScanInfo* pBlockScanInfo, + STsdbReader* pReader, SRow** pTSRow); +static int32_t mergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pBlockScanInfo, int64_t key, + STsdbReader* pReader); + +static int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, int32_t order, SCostSummary* pCost); +static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* retentions, const char* idstr, + int8_t* pLevel); +static SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, int8_t level); +static bool hasDataInLastBlock(SLastBlockReader* pLastBlockReader); +static int32_t doBuildDataBlock(STsdbReader* pReader); +static TSDBKEY getCurrentKeyInBuf(STableBlockScanInfo* pScanInfo, STsdbReader* pReader); +static bool hasDataInFileBlock(const SBlockData* pBlockData, const SFileBlockDumpInfo* pDumpInfo); +static void initBlockDumpInfo(STsdbReader* pReader, SDataBlockIter* pBlockIter); +static int32_t getInitialDelIndex(const SArray* pDelSkyline, int32_t order); + +static bool outOfTimeWindow(int64_t ts, STimeWindow* pWindow) { return (ts > pWindow->ekey) || (ts < pWindow->skey); } + +static int32_t setColumnIdSlotList(SBlockLoadSuppInfo* pSupInfo, SColumnInfo* pCols, const int32_t* pSlotIdList, + int32_t numOfCols) { + pSupInfo->smaValid = true; + pSupInfo->numOfCols = numOfCols; + pSupInfo->colId = taosMemoryMalloc(numOfCols * (sizeof(int16_t) * 2 + POINTER_BYTES)); + if (pSupInfo->colId == NULL) { + taosMemoryFree(pSupInfo->colId); + return TSDB_CODE_OUT_OF_MEMORY; + } + + pSupInfo->slotId = (int16_t*)((char*)pSupInfo->colId + (sizeof(int16_t) * numOfCols)); + pSupInfo->buildBuf = (char**)((char*)pSupInfo->slotId + (sizeof(int16_t) * numOfCols)); + for (int32_t i = 0; i < numOfCols; ++i) { + pSupInfo->colId[i] = pCols[i].colId; + pSupInfo->slotId[i] = pSlotIdList[i]; + + if (IS_VAR_DATA_TYPE(pCols[i].type)) { + pSupInfo->buildBuf[i] = taosMemoryMalloc(pCols[i].bytes); + } else { + pSupInfo->buildBuf[i] = NULL; + } + } + + return TSDB_CODE_SUCCESS; +} + +static int32_t updateBlockSMAInfo(STSchema* pSchema, SBlockLoadSuppInfo* pSupInfo) { + int32_t i = 0, j = 0; + + while (i < pSchema->numOfCols && j < pSupInfo->numOfCols) { + STColumn* pTCol = &pSchema->columns[i]; + if (pTCol->colId == pSupInfo->colId[j]) { + if (!IS_BSMA_ON(pTCol)) { + pSupInfo->smaValid = false; + return TSDB_CODE_SUCCESS; + } + + i += 1; + j += 1; + } else if (pTCol->colId < pSupInfo->colId[j]) { // do nothing + i += 1; + } else { + return TSDB_CODE_INVALID_PARA; + } + } + + return TSDB_CODE_SUCCESS; +} + +static bool isEmptyQueryTimeWindow(STimeWindow* pWindow) { return pWindow->skey > pWindow->ekey; } + +// Update the query time window according to the data time to live(TTL) information, in order to avoid to return +// the expired data to client, even it is queried already. +static STimeWindow updateQueryTimeWindow(STsdb* pTsdb, STimeWindow* pWindow) { + STsdbKeepCfg* pCfg = &pTsdb->keepCfg; + + int64_t now = taosGetTimestamp(pCfg->precision); + int64_t earilyTs = now - (tsTickPerMin[pCfg->precision] * pCfg->keep2) + 1; // needs to add one tick + + STimeWindow win = *pWindow; + if (win.skey < earilyTs) { + win.skey = earilyTs; + } + + return win; +} + +// init file iterator +static int32_t initFilesetIterator(SFilesetIter* pIter, TFileSetArray* pFileSetArray, STsdbReader* pReader) { + size_t numOfFileset = TARRAY2_SIZE(pFileSetArray); + + pIter->index = ASCENDING_TRAVERSE(pReader->info.order) ? -1 : numOfFileset; + pIter->order = pReader->info.order; + pIter->pFilesetList = pFileSetArray; + pIter->numOfFiles = numOfFileset; + + if (pIter->pLastBlockReader == NULL) { + pIter->pLastBlockReader = taosMemoryCalloc(1, sizeof(struct SLastBlockReader)); + if (pIter->pLastBlockReader == NULL) { + int32_t code = TSDB_CODE_OUT_OF_MEMORY; + tsdbError("failed to prepare the last block iterator, since:%s %s", tstrerror(code), pReader->idStr); + return code; + } + } + + SLastBlockReader* pLReader = pIter->pLastBlockReader; + pLReader->order = pReader->info.order; + pLReader->window = pReader->info.window; + pLReader->verRange = pReader->info.verRange; + + pLReader->uid = 0; + tMergeTreeClose(&pLReader->mergeTree); + tsdbDebug("init fileset iterator, total files:%d %s", pIter->numOfFiles, pReader->idStr); + return TSDB_CODE_SUCCESS; +} + +static int32_t filesetIteratorNext(SFilesetIter* pIter, STsdbReader* pReader, bool* hasNext) { + bool asc = ASCENDING_TRAVERSE(pIter->order); + int32_t step = asc ? 1 : -1; + int32_t code = 0; + + pIter->index += step; + if ((asc && pIter->index >= pIter->numOfFiles) || ((!asc) && pIter->index < 0)) { + *hasNext = false; + return TSDB_CODE_SUCCESS; + } + + SCostSummary* pSum = &pReader->cost; + + pIter->pLastBlockReader->uid = 0; + tMergeTreeClose(&pIter->pLastBlockReader->mergeTree); + + pReader->status.pLDataIterArray = + destroySttBlockReader(pReader->status.pLDataIterArray, &pSum->lastBlockLoad, &pSum->lastBlockLoadTime); + pReader->status.pLDataIterArray = taosArrayInit(4, POINTER_BYTES); + + // check file the time range of coverage + STimeWindow win = {0}; + + while (1) { + if (pReader->pFileReader != NULL) { + tsdbDataFileReaderClose(&pReader->pFileReader); + } + + pReader->status.pCurrentFileset = pIter->pFilesetList->data[pIter->index]; + + STFileObj** pFileObj = pReader->status.pCurrentFileset->farr; + if (pFileObj[0] != NULL || pFileObj[3] != NULL) { + SDataFileReaderConfig conf = {.tsdb = pReader->pTsdb, .szPage = pReader->pTsdb->pVnode->config.tsdbPageSize}; + + const char* filesName[4] = {0}; + + if (pFileObj[0] != NULL) { + conf.files[0].file = *pFileObj[0]->f; + conf.files[0].exist = true; + filesName[0] = pFileObj[0]->fname; + + conf.files[1].file = *pFileObj[1]->f; + conf.files[1].exist = true; + filesName[1] = pFileObj[1]->fname; + + conf.files[2].file = *pFileObj[2]->f; + conf.files[2].exist = true; + filesName[2] = pFileObj[2]->fname; + } + + if (pFileObj[3] != NULL) { + conf.files[3].exist = true; + conf.files[3].file = *pFileObj[3]->f; + filesName[3] = pFileObj[3]->fname; + } + + code = tsdbDataFileReaderOpen(filesName, &conf, &pReader->pFileReader); + if (code != TSDB_CODE_SUCCESS) { + goto _err; + } + + pReader->cost.headFileLoad += 1; + } + + int32_t fid = pReader->status.pCurrentFileset->fid; + tsdbFidKeyRange(fid, pReader->pTsdb->keepCfg.days, pReader->pTsdb->keepCfg.precision, &win.skey, &win.ekey); + + // current file are no longer overlapped with query time window, ignore remain files + if ((asc && win.skey > pReader->info.window.ekey) || (!asc && win.ekey < pReader->info.window.skey)) { + tsdbDebug("%p remain files are not qualified for qrange:%" PRId64 "-%" PRId64 ", ignore, %s", pReader, + pReader->info.window.skey, pReader->info.window.ekey, pReader->idStr); + *hasNext = false; + return TSDB_CODE_SUCCESS; + } + + if ((asc && (win.ekey < pReader->info.window.skey)) || ((!asc) && (win.skey > pReader->info.window.ekey))) { + pIter->index += step; + if ((asc && pIter->index >= pIter->numOfFiles) || ((!asc) && pIter->index < 0)) { + *hasNext = false; + return TSDB_CODE_SUCCESS; + } + continue; + } + + tsdbDebug("%p file found fid:%d for qrange:%" PRId64 "-%" PRId64 ", %s", pReader, fid, pReader->info.window.skey, + pReader->info.window.ekey, pReader->idStr); + *hasNext = true; + return TSDB_CODE_SUCCESS; + } + +_err: + *hasNext = false; + return code; +} + +static void resetDataBlockIterator(SDataBlockIter* pIter, int32_t order) { + pIter->order = order; + pIter->index = -1; + pIter->numOfBlocks = 0; + if (pIter->blockList == NULL) { + pIter->blockList = taosArrayInit(4, sizeof(SFileDataBlockInfo)); + } else { + taosArrayClear(pIter->blockList); + } +} + +static void cleanupDataBlockIterator(SDataBlockIter* pIter) { taosArrayDestroy(pIter->blockList); } + +static void initReaderStatus(SReaderStatus* pStatus) { + pStatus->pTableIter = NULL; + pStatus->loadFromFile = true; +} + +static SSDataBlock* createResBlock(SQueryTableDataCond* pCond, int32_t capacity) { + SSDataBlock* pResBlock = createDataBlock(); + if (pResBlock == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return NULL; + } + + for (int32_t i = 0; i < pCond->numOfCols; ++i) { + SColumnInfoData colInfo = {0}; + colInfo.info = pCond->colList[i]; + blockDataAppendColInfo(pResBlock, &colInfo); + } + + int32_t code = blockDataEnsureCapacity(pResBlock, capacity); + if (code != TSDB_CODE_SUCCESS) { + terrno = code; + taosMemoryFree(pResBlock); + return NULL; + } + return pResBlock; +} + +static int32_t tsdbInitReaderLock(STsdbReader* pReader) { + int32_t code = -1; + qTrace("tsdb/read: %p, pre-init read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); + + code = taosThreadMutexInit(&pReader->readerMutex, NULL); + + qTrace("tsdb/read: %p, post-init read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); + + return code; +} + +static int32_t tsdbUninitReaderLock(STsdbReader* pReader) { + int32_t code = -1; + qTrace("tsdb/read: %p, pre-uninit read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); + + code = taosThreadMutexDestroy(&pReader->readerMutex); + + qTrace("tsdb/read: %p, post-uninit read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); + + return code; +} + +static int32_t tsdbAcquireReader(STsdbReader* pReader) { + int32_t code = -1; + qTrace("tsdb/read: %p, pre-take read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); + + code = taosThreadMutexLock(&pReader->readerMutex); + + qTrace("tsdb/read: %p, post-take read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); + + return code; +} + +static int32_t tsdbTryAcquireReader(STsdbReader* pReader) { + int32_t code = -1; + qTrace("tsdb/read: %p, pre-trytake read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); + + code = taosThreadMutexTryLock(&pReader->readerMutex); + + qTrace("tsdb/read: %p, post-trytake read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); + + return code; +} + +static int32_t tsdbReleaseReader(STsdbReader* pReader) { + int32_t code = -1; + qTrace("tsdb/read: %p, pre-untake read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); + + code = taosThreadMutexUnlock(&pReader->readerMutex); + + qTrace("tsdb/read: %p, post-untake read mutex: %p, code: %d", pReader, &pReader->readerMutex, code); + + return code; +} + +void tsdbReleaseDataBlock2(STsdbReader* pReader) { + SReaderStatus* pStatus = &pReader->status; + if (!pStatus->composedDataBlock) { + tsdbReleaseReader(pReader); + } +} + +static int32_t initResBlockInfo(SResultBlockInfo* pResBlockInfo, int64_t capacity, SSDataBlock* pResBlock, + SQueryTableDataCond* pCond) { + pResBlockInfo->capacity = capacity; + pResBlockInfo->pResBlock = pResBlock; + terrno = 0; + + if (pResBlockInfo->pResBlock == NULL) { + pResBlockInfo->freeBlock = true; + pResBlockInfo->pResBlock = createResBlock(pCond, pResBlockInfo->capacity); + } else { + pResBlockInfo->freeBlock = false; + } + + return terrno; +} + +static int32_t tsdbReaderCreate(SVnode* pVnode, SQueryTableDataCond* pCond, void** ppReader, int32_t capacity, + SSDataBlock* pResBlock, const char* idstr) { + int32_t code = 0; + int8_t level = 0; + STsdbReader* pReader = (STsdbReader*)taosMemoryCalloc(1, sizeof(*pReader)); + if (pReader == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _end; + } + + if (VND_IS_TSMA(pVnode)) { + tsdbDebug("vgId:%d, tsma is selected to query, %s", TD_VID(pVnode), idstr); + } + + initReaderStatus(&pReader->status); + + pReader->pTsdb = getTsdbByRetentions(pVnode, pCond->twindows.skey, pVnode->config.tsdbCfg.retentions, idstr, &level); + pReader->info.suid = pCond->suid; + pReader->info.order = pCond->order; + + pReader->idStr = (idstr != NULL) ? taosStrdup(idstr) : NULL; + pReader->info.verRange = getQueryVerRange(pVnode, pCond, level); + pReader->type = pCond->type; + pReader->info.window = updateQueryTimeWindow(pReader->pTsdb, &pCond->twindows); + pReader->blockInfoBuf.numPerBucket = 1000; // 1000 tables per bucket + + code = initResBlockInfo(&pReader->resBlockInfo, capacity, pResBlock, pCond); + if (code != TSDB_CODE_SUCCESS) { + goto _end; + } + + if (pCond->numOfCols <= 0) { + tsdbError("vgId:%d, invalid column number %d in query cond, %s", TD_VID(pVnode), pCond->numOfCols, idstr); + code = TSDB_CODE_INVALID_PARA; + goto _end; + } + + // allocate buffer in order to load data blocks from file + SBlockLoadSuppInfo* pSup = &pReader->suppInfo; + pSup->tsColAgg.colId = PRIMARYKEY_TIMESTAMP_COL_ID; + setColumnIdSlotList(pSup, pCond->colList, pCond->pSlotList, pCond->numOfCols); + + code = tBlockDataCreate(&pReader->status.fileBlockData); + if (code != TSDB_CODE_SUCCESS) { + terrno = code; + goto _end; + } + + if (pReader->suppInfo.colId[0] != PRIMARYKEY_TIMESTAMP_COL_ID) { + tsdbError("the first column isn't primary timestamp, %d, %s", pReader->suppInfo.colId[0], pReader->idStr); + code = TSDB_CODE_INVALID_PARA; + goto _end; + } + + pReader->status.pPrimaryTsCol = taosArrayGet(pReader->resBlockInfo.pResBlock->pDataBlock, pSup->slotId[0]); + int32_t type = pReader->status.pPrimaryTsCol->info.type; + if (type != TSDB_DATA_TYPE_TIMESTAMP) { + tsdbError("the first column isn't primary timestamp in result block, actual: %s, %s", tDataTypes[type].name, + pReader->idStr); + code = TSDB_CODE_INVALID_PARA; + goto _end; + } + + tsdbInitReaderLock(pReader); + + *ppReader = pReader; + return code; + +_end: + tsdbReaderClose(pReader); + *ppReader = NULL; + return code; +} + +static int32_t doLoadBlockIndex(STsdbReader* pReader, SDataFileReader* pFileReader, SArray* pIndexList) { + int64_t st = taosGetTimestampUs(); + int32_t numOfTables = tSimpleHashGetSize(pReader->status.pTableMap); + if (pFileReader == NULL) { + return TSDB_CODE_SUCCESS; + } + + const TBrinBlkArray* pBlkArray = NULL; + + int32_t code = tsdbDataFileReadBrinBlk(pFileReader, &pBlkArray); + +#if 0 + LRUHandle* handle = NULL; + + int32_t code = tsdbCacheGetBlockIdx(pFileReader->pTsdb->biCache, pFileReader, &handle); + if (code != TSDB_CODE_SUCCESS || handle == NULL) { + goto _end; + } + + + SArray* aBlockIdx = (SArray*)taosLRUCacheValue(pFileReader->pTsdb->biCache, handle); + size_t num = taosArrayGetSize(aBlockIdx); + if (num == 0) { + tsdbBICacheRelease(pFileReader->pTsdb->biCache, handle); + return TSDB_CODE_SUCCESS; + } +#endif + + // todo binary search to the start position + int64_t et1 = taosGetTimestampUs(); + + SBrinBlk* pBrinBlk = NULL; + STableUidList* pList = &pReader->status.uidList; + + int32_t i = 0; + + while (i < TARRAY2_SIZE(pBlkArray)) { + pBrinBlk = &pBlkArray->data[i]; + if (pBrinBlk->maxTbid.suid < pReader->info.suid) { + i += 1; + continue; + } + + if (pBrinBlk->minTbid.suid > pReader->info.suid) { // not include the queried table/super table, quit the loop + break; + } + + ASSERT(pBrinBlk->minTbid.suid <= pReader->info.suid && pBrinBlk->maxTbid.suid >= pReader->info.suid); + if (pBrinBlk->maxTbid.suid == pReader->info.suid && pBrinBlk->maxTbid.uid < pList->tableUidList[0]) { + i += 1; + continue; + } + + if (pBrinBlk->minTbid.suid == pReader->info.suid && pBrinBlk->minTbid.uid > pList->tableUidList[numOfTables - 1]) { + break; + } + + taosArrayPush(pIndexList, pBrinBlk); + i += 1; + } + + int64_t et2 = taosGetTimestampUs(); + tsdbDebug("load block index for %d/%d tables completed, elapsed time:%.2f ms, set BrinBlk:%.2f ms, size:%.2f Kb %s", + numOfTables, (int32_t)pBlkArray->size, (et1 - st) / 1000.0, (et2 - et1) / 1000.0, + pBlkArray->size * sizeof(SBrinBlk) / 1024.0, pReader->idStr); + + pReader->cost.headFileLoadTime += (et1 - st) / 1000.0; + +_end: + // tsdbBICacheRelease(pFileReader->pTsdb->biCache, handle); + return code; +} + +static int32_t doLoadFileBlock(STsdbReader* pReader, SArray* pIndexList, SBlockNumber* pBlockNum, + SArray* pTableScanInfoList) { + size_t sizeInDisk = 0; + int64_t st = taosGetTimestampUs(); + + // clear info for the new file + cleanupInfoFoxNextFileset(pReader->status.pTableMap); + + int32_t k = 0; + int32_t numOfTables = tSimpleHashGetSize(pReader->status.pTableMap); + int32_t step = ASCENDING_TRAVERSE(pReader->info.order) ? 1 : -1; + STimeWindow w = pReader->info.window; + SBrinRecord* pRecord = NULL; + + SBrinRecordIter iter = {0}; + initBrinRecordIter(&iter, pReader->pFileReader, pIndexList); + + while (((pRecord = getNextBrinRecord(&iter)) != NULL)) { + if (pRecord->suid > pReader->info.suid) { + break; + } + + uint64_t uid = pReader->status.uidList.tableUidList[k]; + if (pRecord->suid < pReader->info.suid) { + continue; + } + + if (uid < pRecord->uid) { // forward the table uid index + while (k < numOfTables && pReader->status.uidList.tableUidList[k] < pRecord->uid) { + k += 1; + } + + if (k >= numOfTables) { + break; + } + + uid = pReader->status.uidList.tableUidList[k]; + } + + if (pRecord->uid < uid) { + continue; + } + + ASSERT(pRecord->suid == pReader->info.suid && uid == pRecord->uid); + + STableBlockScanInfo* pScanInfo = getTableBlockScanInfo(pReader->status.pTableMap, uid, pReader->idStr); + if (ASCENDING_TRAVERSE(pReader->info.order)) { + w.skey = pScanInfo->lastKey + step; + } else { + w.ekey = pScanInfo->lastKey + step; + } + + if (isEmptyQueryTimeWindow(&w)) { + k += 1; + continue; + } + + // 1. time range check + if (pRecord->firstKey > w.ekey || pRecord->lastKey < w.skey) { + continue; + } + + // 2. version range check + if (pRecord->minVer > pReader->info.verRange.maxVer || pRecord->maxVer < pReader->info.verRange.minVer) { + continue; + } + + if (pScanInfo->pBlockList == NULL) { + pScanInfo->pBlockList = taosArrayInit(4, sizeof(SBrinRecord)); + } + + void* p1 = taosArrayPush(pScanInfo->pBlockList, pRecord); + if (p1 == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + pBlockNum->numOfBlocks += 1; + if (taosArrayGetSize(pTableScanInfoList) == 0) { + taosArrayPush(pTableScanInfoList, &pScanInfo); + } else { + STableBlockScanInfo** p = taosArrayGetLast(pTableScanInfoList); + if ((*p)->uid != uid) { + taosArrayPush(pTableScanInfoList, &pScanInfo); + } + } + } + + clearBrinBlockIter(&iter); + + pBlockNum->numOfLastFiles = pReader->status.pCurrentFileset->lvlArr->size; + int32_t total = pBlockNum->numOfLastFiles + pBlockNum->numOfBlocks; + + double el = (taosGetTimestampUs() - st) / 1000.0; + tsdbDebug( + "load block of %d tables completed, blocks:%d in %d tables, last-files:%d, block-info-size:%.2f Kb, elapsed " + "time:%.2f ms %s", + numOfTables, pBlockNum->numOfBlocks, (int32_t)taosArrayGetSize(pTableScanInfoList), pBlockNum->numOfLastFiles, + sizeInDisk / 1000.0, el, pReader->idStr); + + pReader->cost.numOfBlocks += total; + pReader->cost.headFileLoadTime += el; + + return TSDB_CODE_SUCCESS; +} + +static void setBlockAllDumped(SFileBlockDumpInfo* pDumpInfo, int64_t maxKey, int32_t order) { + int32_t step = ASCENDING_TRAVERSE(order) ? 1 : -1; + pDumpInfo->allDumped = true; + pDumpInfo->lastKey = maxKey + step; +} + +static int32_t doCopyColVal(SColumnInfoData* pColInfoData, int32_t rowIndex, int32_t colIndex, SColVal* pColVal, + SBlockLoadSuppInfo* pSup) { + if (IS_VAR_DATA_TYPE(pColVal->type)) { + if (!COL_VAL_IS_VALUE(pColVal)) { + colDataSetNULL(pColInfoData, rowIndex); + } else { + varDataSetLen(pSup->buildBuf[colIndex], pColVal->value.nData); + if (pColVal->value.nData > pColInfoData->info.bytes) { + tsdbWarn("column cid:%d actual data len %d is bigger than schema len %d", pColVal->cid, pColVal->value.nData, + pColInfoData->info.bytes); + return TSDB_CODE_TDB_INVALID_TABLE_SCHEMA_VER; + } + if (pColVal->value.nData > 0) { // pData may be null, if nData is 0 + memcpy(varDataVal(pSup->buildBuf[colIndex]), pColVal->value.pData, pColVal->value.nData); + } + + colDataSetVal(pColInfoData, rowIndex, pSup->buildBuf[colIndex], false); + } + } else { + colDataSetVal(pColInfoData, rowIndex, (const char*)&pColVal->value, !COL_VAL_IS_VALUE(pColVal)); + } + + return TSDB_CODE_SUCCESS; +} + +static SFileDataBlockInfo* getCurrentBlockInfo(SDataBlockIter* pBlockIter) { + size_t num = TARRAY_SIZE(pBlockIter->blockList); + if (num == 0) { + ASSERT(pBlockIter->numOfBlocks == num); + return NULL; + } + + SFileDataBlockInfo* pBlockInfo = taosArrayGet(pBlockIter->blockList, pBlockIter->index); + return pBlockInfo; +} + +static int doBinarySearchKey(TSKEY* keyList, int num, int pos, TSKEY key, int order) { + // start end position + int s, e; + s = pos; + + // check + ASSERT(pos >= 0 && pos < num && num > 0); + if (order == TSDB_ORDER_ASC) { + // find the first position which is smaller than the key + e = num - 1; + if (key < keyList[pos]) return -1; + while (1) { + // check can return + if (key >= keyList[e]) return e; + if (key <= keyList[s]) return s; + if (e - s <= 1) return s; + + // change start or end position + int mid = s + (e - s + 1) / 2; + if (keyList[mid] > key) + e = mid; + else if (keyList[mid] < key) + s = mid; + else + return mid; + } + } else { // DESC + // find the first position which is bigger than the key + e = 0; + if (key > keyList[pos]) return -1; + while (1) { + // check can return + if (key <= keyList[e]) return e; + if (key >= keyList[s]) return s; + if (s - e <= 1) return s; + + // change start or end position + int mid = s - (s - e + 1) / 2; + if (keyList[mid] < key) + e = mid; + else if (keyList[mid] > key) + s = mid; + else + return mid; + } + } +} + +static int32_t getEndPosInDataBlock(STsdbReader* pReader, SBlockData* pBlockData, SBrinRecord* pRecord, int32_t pos) { + // NOTE: reverse the order to find the end position in data block + int32_t endPos = -1; + bool asc = ASCENDING_TRAVERSE(pReader->info.order); + + if (asc && pReader->info.window.ekey >= pRecord->lastKey) { + endPos = pRecord->numRow - 1; + } else if (!asc && pReader->info.window.skey <= pRecord->firstKey) { + endPos = 0; + } else { + int64_t key = asc ? pReader->info.window.ekey : pReader->info.window.skey; + endPos = doBinarySearchKey(pBlockData->aTSKEY, pRecord->numRow, pos, key, pReader->info.order); + } + + if ((pReader->info.verRange.maxVer >= pRecord->minVer && pReader->info.verRange.maxVer < pRecord->maxVer) || + (pReader->info.verRange.minVer <= pRecord->maxVer && pReader->info.verRange.minVer > pRecord->minVer)) { + int32_t i = endPos; + + if (asc) { + for (; i >= 0; --i) { + if (pBlockData->aVersion[i] <= pReader->info.verRange.maxVer) { + break; + } + } + } else { + for (; i < pRecord->numRow; ++i) { + if (pBlockData->aVersion[i] >= pReader->info.verRange.minVer) { + break; + } + } + } + + endPos = i; + } + + return endPos; +} + +static void copyPrimaryTsCol(const SBlockData* pBlockData, SFileBlockDumpInfo* pDumpInfo, SColumnInfoData* pColData, + int32_t dumpedRows, bool asc) { + if (asc) { + memcpy(pColData->pData, &pBlockData->aTSKEY[pDumpInfo->rowIndex], dumpedRows * sizeof(int64_t)); + } else { + int32_t startIndex = pDumpInfo->rowIndex - dumpedRows + 1; + memcpy(pColData->pData, &pBlockData->aTSKEY[startIndex], dumpedRows * sizeof(int64_t)); + + // todo: opt perf by extract the loop + // reverse the array list + int32_t mid = dumpedRows >> 1u; + int64_t* pts = (int64_t*)pColData->pData; + for (int32_t j = 0; j < mid; ++j) { + int64_t t = pts[j]; + pts[j] = pts[dumpedRows - j - 1]; + pts[dumpedRows - j - 1] = t; + } + } +} + +// a faster version of copy procedure. +static void copyNumericCols(const SColData* pData, SFileBlockDumpInfo* pDumpInfo, SColumnInfoData* pColData, + int32_t dumpedRows, bool asc) { + uint8_t* p = NULL; + if (asc) { + p = pData->pData + tDataTypes[pData->type].bytes * pDumpInfo->rowIndex; + } else { + int32_t startIndex = pDumpInfo->rowIndex - dumpedRows + 1; + p = pData->pData + tDataTypes[pData->type].bytes * startIndex; + } + + int32_t step = asc ? 1 : -1; + + // make sure it is aligned to 8bit, the allocated memory address is aligned to 256bit + // ASSERT((((uint64_t)pColData->pData) & (0x8 - 1)) == 0); + + // 1. copy data in a batch model + memcpy(pColData->pData, p, dumpedRows * tDataTypes[pData->type].bytes); + + // 2. reverse the array list in case of descending order scan data block + if (!asc) { + switch (pColData->info.type) { + case TSDB_DATA_TYPE_TIMESTAMP: + case TSDB_DATA_TYPE_DOUBLE: + case TSDB_DATA_TYPE_BIGINT: + case TSDB_DATA_TYPE_UBIGINT: { + int32_t mid = dumpedRows >> 1u; + int64_t* pts = (int64_t*)pColData->pData; + for (int32_t j = 0; j < mid; ++j) { + int64_t t = pts[j]; + pts[j] = pts[dumpedRows - j - 1]; + pts[dumpedRows - j - 1] = t; + } + break; + } + + case TSDB_DATA_TYPE_BOOL: + case TSDB_DATA_TYPE_TINYINT: + case TSDB_DATA_TYPE_UTINYINT: { + int32_t mid = dumpedRows >> 1u; + int8_t* pts = (int8_t*)pColData->pData; + for (int32_t j = 0; j < mid; ++j) { + int8_t t = pts[j]; + pts[j] = pts[dumpedRows - j - 1]; + pts[dumpedRows - j - 1] = t; + } + break; + } + + case TSDB_DATA_TYPE_SMALLINT: + case TSDB_DATA_TYPE_USMALLINT: { + int32_t mid = dumpedRows >> 1u; + int16_t* pts = (int16_t*)pColData->pData; + for (int32_t j = 0; j < mid; ++j) { + int64_t t = pts[j]; + pts[j] = pts[dumpedRows - j - 1]; + pts[dumpedRows - j - 1] = t; + } + break; + } + + case TSDB_DATA_TYPE_FLOAT: + case TSDB_DATA_TYPE_INT: + case TSDB_DATA_TYPE_UINT: { + int32_t mid = dumpedRows >> 1u; + int32_t* pts = (int32_t*)pColData->pData; + for (int32_t j = 0; j < mid; ++j) { + int32_t t = pts[j]; + pts[j] = pts[dumpedRows - j - 1]; + pts[dumpedRows - j - 1] = t; + } + break; + } + } + } + + // 3. if the null value exists, check items one-by-one + if (pData->flag != HAS_VALUE) { + int32_t rowIndex = 0; + + for (int32_t j = pDumpInfo->rowIndex; rowIndex < dumpedRows; j += step, rowIndex++) { + uint8_t v = tColDataGetBitValue(pData, j); + if (v == 0 || v == 1) { + colDataSetNull_f(pColData->nullbitmap, rowIndex); + pColData->hasNull = true; + } + } + } +} + +static int32_t copyBlockDataToSDataBlock(STsdbReader* pReader) { + SReaderStatus* pStatus = &pReader->status; + SDataBlockIter* pBlockIter = &pStatus->blockIter; + SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo; + SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; + + SBlockData* pBlockData = &pStatus->fileBlockData; + SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(pBlockIter); + SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; + int32_t numOfOutputCols = pSupInfo->numOfCols; + int32_t code = TSDB_CODE_SUCCESS; + + SColVal cv = {0}; + int64_t st = taosGetTimestampUs(); + bool asc = ASCENDING_TRAVERSE(pReader->info.order); + int32_t step = asc ? 1 : -1; + + SBrinRecord* pRecord = &pBlockInfo->record; + + // no data exists, return directly. + if (pBlockData->nRow == 0 || pBlockData->aTSKEY == 0) { + tsdbWarn("%p no need to copy since no data in blockData, table uid:%" PRIu64 " has been dropped, %s", pReader, + pBlockInfo->uid, pReader->idStr); + pResBlock->info.rows = 0; + return 0; + } + + // row index of dump info remain the initial position, let's find the appropriate start position. + if ((pDumpInfo->rowIndex == 0 && asc) || (pDumpInfo->rowIndex == pRecord->numRow - 1 && (!asc))) { + if (asc && pReader->info.window.skey <= pRecord->firstKey && pReader->info.verRange.minVer <= pRecord->minVer) { + // pDumpInfo->rowIndex = 0; + } else if (!asc && pReader->info.window.ekey >= pRecord->lastKey && + pReader->info.verRange.maxVer >= pRecord->maxVer) { + // pDumpInfo->rowIndex = pRecord->numRow - 1; + } else { // find the appropriate the start position in current block, and set it to be the current rowIndex + int32_t pos = asc ? pRecord->numRow - 1 : 0; + int32_t order = asc ? TSDB_ORDER_DESC : TSDB_ORDER_ASC; + int64_t key = asc ? pReader->info.window.skey : pReader->info.window.ekey; + pDumpInfo->rowIndex = doBinarySearchKey(pBlockData->aTSKEY, pRecord->numRow, pos, key, order); + + if (pDumpInfo->rowIndex < 0) { + tsdbError( + "%p failed to locate the start position in current block, global index:%d, table index:%d, brange:%" PRId64 + "-%" PRId64 ", minVer:%" PRId64 ", maxVer:%" PRId64 " %s", + pReader, pBlockIter->index, pBlockInfo->tbBlockIdx, pRecord->firstKey, pRecord->lastKey, pRecord->minVer, + pRecord->maxVer, pReader->idStr); + return TSDB_CODE_INVALID_PARA; + } + + ASSERT(pReader->info.verRange.minVer <= pRecord->maxVer && pReader->info.verRange.maxVer >= pRecord->minVer); + + // find the appropriate start position that satisfies the version requirement. + if ((pReader->info.verRange.maxVer >= pRecord->minVer && pReader->info.verRange.maxVer < pRecord->maxVer) || + (pReader->info.verRange.minVer <= pRecord->maxVer && pReader->info.verRange.minVer > pRecord->minVer)) { + int32_t i = pDumpInfo->rowIndex; + if (asc) { + for (; i < pRecord->numRow; ++i) { + if (pBlockData->aVersion[i] >= pReader->info.verRange.minVer) { + break; + } + } + } else { + for (; i >= 0; --i) { + if (pBlockData->aVersion[i] <= pReader->info.verRange.maxVer) { + break; + } + } + } + + pDumpInfo->rowIndex = i; + } + } + } + + // time window check + int32_t endIndex = getEndPosInDataBlock(pReader, pBlockData, pRecord, pDumpInfo->rowIndex); + if (endIndex == -1) { + setBlockAllDumped(pDumpInfo, pReader->info.window.ekey, pReader->info.order); + return TSDB_CODE_SUCCESS; + } + + endIndex += step; + int32_t dumpedRows = asc ? (endIndex - pDumpInfo->rowIndex) : (pDumpInfo->rowIndex - endIndex); + if (dumpedRows > pReader->resBlockInfo.capacity) { // output buffer check + dumpedRows = pReader->resBlockInfo.capacity; + } else if (dumpedRows <= 0) { // no qualified rows in current data block, abort directly. + setBlockAllDumped(pDumpInfo, pReader->info.window.ekey, pReader->info.order); + return TSDB_CODE_SUCCESS; + } + + int32_t i = 0; + int32_t rowIndex = 0; + + SColumnInfoData* pColData = taosArrayGet(pResBlock->pDataBlock, pSupInfo->slotId[i]); + if (pSupInfo->colId[i] == PRIMARYKEY_TIMESTAMP_COL_ID) { + copyPrimaryTsCol(pBlockData, pDumpInfo, pColData, dumpedRows, asc); + i += 1; + } + + int32_t colIndex = 0; + int32_t num = pBlockData->nColData; + while (i < numOfOutputCols && colIndex < num) { + rowIndex = 0; + + SColData* pData = tBlockDataGetColDataByIdx(pBlockData, colIndex); + if (pData->cid < pSupInfo->colId[i]) { + colIndex += 1; + } else if (pData->cid == pSupInfo->colId[i]) { + pColData = taosArrayGet(pResBlock->pDataBlock, pSupInfo->slotId[i]); + + if (pData->flag == HAS_NONE || pData->flag == HAS_NULL || pData->flag == (HAS_NULL | HAS_NONE)) { + colDataSetNNULL(pColData, 0, dumpedRows); + } else { + if (IS_MATHABLE_TYPE(pColData->info.type)) { + copyNumericCols(pData, pDumpInfo, pColData, dumpedRows, asc); + } else { // varchar/nchar type + for (int32_t j = pDumpInfo->rowIndex; rowIndex < dumpedRows; j += step) { + tColDataGetValue(pData, j, &cv); + code = doCopyColVal(pColData, rowIndex++, i, &cv, pSupInfo); + if (code) { + return code; + } + } + } + } + + colIndex += 1; + i += 1; + } else { // the specified column does not exist in file block, fill with null data + pColData = taosArrayGet(pResBlock->pDataBlock, pSupInfo->slotId[i]); + colDataSetNNULL(pColData, 0, dumpedRows); + i += 1; + } + } + + // fill the mis-matched columns with null value + while (i < numOfOutputCols) { + pColData = taosArrayGet(pResBlock->pDataBlock, pSupInfo->slotId[i]); + colDataSetNNULL(pColData, 0, dumpedRows); + i += 1; + } + + pResBlock->info.dataLoad = 1; + pResBlock->info.rows = dumpedRows; + pDumpInfo->rowIndex += step * dumpedRows; + + // check if current block are all handled + if (pDumpInfo->rowIndex >= 0 && pDumpInfo->rowIndex < pRecord->numRow) { + int64_t ts = pBlockData->aTSKEY[pDumpInfo->rowIndex]; + if (outOfTimeWindow(ts, + &pReader->info.window)) { // the remain data has out of query time window, ignore current block + setBlockAllDumped(pDumpInfo, ts, pReader->info.order); + } + } else { + int64_t ts = asc ? pRecord->lastKey : pRecord->firstKey; + setBlockAllDumped(pDumpInfo, ts, pReader->info.order); + } + + double elapsedTime = (taosGetTimestampUs() - st) / 1000.0; + pReader->cost.blockLoadTime += elapsedTime; + + int32_t unDumpedRows = asc ? pRecord->numRow - pDumpInfo->rowIndex : pDumpInfo->rowIndex + 1; + tsdbDebug("%p copy file block to sdatablock, global index:%d, table index:%d, brange:%" PRId64 "-%" PRId64 + ", rows:%d, remain:%d, minVer:%" PRId64 ", maxVer:%" PRId64 ", uid:%" PRIu64 " elapsed time:%.2f ms, %s", + pReader, pBlockIter->index, pBlockInfo->tbBlockIdx, pRecord->firstKey, pRecord->lastKey, dumpedRows, + unDumpedRows, pRecord->minVer, pRecord->maxVer, pBlockInfo->uid, elapsedTime, pReader->idStr); + + return TSDB_CODE_SUCCESS; +} + +static FORCE_INLINE STSchema* getTableSchemaImpl(STsdbReader* pReader, uint64_t uid) { + ASSERT(pReader->info.pSchema == NULL); + + int32_t code = metaGetTbTSchemaEx(pReader->pTsdb->pVnode->pMeta, pReader->info.suid, uid, -1, &pReader->info.pSchema); + if (code != TSDB_CODE_SUCCESS || pReader->info.pSchema == NULL) { + terrno = code; + tsdbError("failed to get table schema, uid:%" PRIu64 ", it may have been dropped, ver:-1, %s", uid, pReader->idStr); + return NULL; + } + + code = tsdbRowMergerInit(&pReader->status.merger, pReader->info.pSchema); + if (code != TSDB_CODE_SUCCESS) { + terrno = code; + tsdbError("failed to init merger, code:%s, %s", tstrerror(code), pReader->idStr); + return NULL; + } + + return pReader->info.pSchema; +} + +static int32_t doLoadFileBlockData(STsdbReader* pReader, SDataBlockIter* pBlockIter, SBlockData* pBlockData, + uint64_t uid) { + int32_t code = 0; + STSchema* pSchema = pReader->info.pSchema; + int64_t st = taosGetTimestampUs(); + + tBlockDataReset(pBlockData); + + if (pReader->info.pSchema == NULL) { + pSchema = getTableSchemaImpl(pReader, uid); + if (pSchema == NULL) { + tsdbDebug("%p table uid:%" PRIu64 " has been dropped, no data existed, %s", pReader, uid, pReader->idStr); + return code; + } + } + + SBlockLoadSuppInfo* pSup = &pReader->suppInfo; + SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(pBlockIter); + SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; + + SBrinRecord* pRecord = &pBlockInfo->record; + code = tsdbDataFileReadBlockDataByColumn(pReader->pFileReader, pRecord, pBlockData, pSchema, &pSup->colId[1], + pSup->numOfCols - 1); + if (code != TSDB_CODE_SUCCESS) { + tsdbError("%p error occurs in loading file block, global index:%d, table index:%d, brange:%" PRId64 "-%" PRId64 + ", rows:%d, code:%s %s", + pReader, pBlockIter->index, pBlockInfo->tbBlockIdx, pBlockInfo->record.firstKey, + pBlockInfo->record.lastKey, pBlockInfo->record.numRow, tstrerror(code), pReader->idStr); + return code; + } + + double elapsedTime = (taosGetTimestampUs() - st) / 1000.0; + + tsdbDebug("%p load file block into buffer, global index:%d, index in table block list:%d, brange:%" PRId64 "-%" PRId64 + ", rows:%d, minVer:%" PRId64 ", maxVer:%" PRId64 ", elapsed time:%.2f ms, %s", + pReader, pBlockIter->index, pBlockInfo->tbBlockIdx, pRecord->firstKey, pRecord->lastKey, pRecord->numRow, + pRecord->minVer, pRecord->maxVer, elapsedTime, pReader->idStr); + + pReader->cost.blockLoadTime += elapsedTime; + pDumpInfo->allDumped = false; + + return TSDB_CODE_SUCCESS; +} + +/** + * This is an two rectangles overlap cases. + */ +static int32_t dataBlockPartiallyRequired(STimeWindow* pWindow, SVersionRange* pVerRange, SFileDataBlockInfo* pBlock) { + return (pWindow->ekey < pBlock->record.lastKey && pWindow->ekey >= pBlock->record.firstKey) || + (pWindow->skey > pBlock->record.firstKey && pWindow->skey <= pBlock->record.lastKey) || + (pVerRange->minVer > pBlock->record.minVer && pVerRange->minVer <= pBlock->record.maxVer) || + (pVerRange->maxVer < pBlock->record.maxVer && pVerRange->maxVer >= pBlock->record.minVer); +} + +static bool getNeighborBlockOfSameTable(SFileDataBlockInfo* pBlockInfo, STableBlockScanInfo* pTableBlockScanInfo, + int32_t* nextIndex, int32_t order, SBrinRecord* pRecord) { + bool asc = ASCENDING_TRAVERSE(order); + if (asc && pBlockInfo->tbBlockIdx >= taosArrayGetSize(pTableBlockScanInfo->pBlockList) - 1) { + return false; + } + + if (!asc && pBlockInfo->tbBlockIdx == 0) { + return false; + } + + int32_t step = asc ? 1 : -1; + // *nextIndex = pBlockInfo->tbBlockIdx + step; + // *pBlockIndex = *(SBlockIndex*)taosArrayGet(pTableBlockScanInfo->pBlockList, *nextIndex); + SBrinRecord* p = taosArrayGet(pTableBlockScanInfo->pBlockList, pBlockInfo->tbBlockIdx + step); + memcpy(pRecord, p, sizeof(SBrinRecord)); + + // tMapDataGetItemByIdx(&pTableBlockScanInfo->mapData, pIndex->ordinalIndex, pBlock, tGetDataBlk); + return true; +} + +static int32_t findFileBlockInfoIndex(SDataBlockIter* pBlockIter, SFileDataBlockInfo* pFBlockInfo) { + int32_t step = ASCENDING_TRAVERSE(pBlockIter->order) ? 1 : -1; + int32_t index = pBlockIter->index; + + while (index < pBlockIter->numOfBlocks && index >= 0) { + SFileDataBlockInfo* pFBlock = taosArrayGet(pBlockIter->blockList, index); + if (pFBlock->uid == pFBlockInfo->uid && pFBlock->tbBlockIdx == pFBlockInfo->tbBlockIdx) { + return index; + } + + index += step; + } + + return -1; +} + +static int32_t setFileBlockActiveInBlockIter(SDataBlockIter* pBlockIter, int32_t index, int32_t step) { + if (index < 0 || index >= pBlockIter->numOfBlocks) { + return -1; + } + + SFileDataBlockInfo fblock = *(SFileDataBlockInfo*)taosArrayGet(pBlockIter->blockList, index); + pBlockIter->index += step; + + if (index != pBlockIter->index) { + taosArrayRemove(pBlockIter->blockList, index); + taosArrayInsert(pBlockIter->blockList, pBlockIter->index, &fblock); + + SFileDataBlockInfo* pBlockInfo = taosArrayGet(pBlockIter->blockList, pBlockIter->index); + ASSERT(pBlockInfo->uid == fblock.uid && pBlockInfo->tbBlockIdx == fblock.tbBlockIdx); + } + + return TSDB_CODE_SUCCESS; +} + +// todo: this attribute could be acquired during extractin the global ordered block list. +static bool overlapWithNeighborBlock2(SFileDataBlockInfo* pBlock, SBrinRecord* pRec, int32_t order) { + // it is the last block in current file, no chance to overlap with neighbor blocks. + if (ASCENDING_TRAVERSE(order)) { + return pBlock->record.lastKey == pRec->firstKey; + } else { + return pBlock->record.firstKey == pRec->lastKey; + } +} + +static bool bufferDataInFileBlockGap(int32_t order, TSDBKEY key, SFileDataBlockInfo* pBlock) { + bool ascScan = ASCENDING_TRAVERSE(order); + + return (ascScan && (key.ts != TSKEY_INITIAL_VAL && key.ts <= pBlock->record.firstKey)) || + (!ascScan && (key.ts != TSKEY_INITIAL_VAL && key.ts >= pBlock->record.lastKey)); +} + +static bool keyOverlapFileBlock(TSDBKEY key, SFileDataBlockInfo* pBlock, SVersionRange* pVerRange) { + return (key.ts >= pBlock->record.firstKey && key.ts <= pBlock->record.lastKey) && + (pBlock->record.maxVer >= pVerRange->minVer) && (pBlock->record.minVer <= pVerRange->maxVer); +} + +static bool doCheckforDatablockOverlap(STableBlockScanInfo* pBlockScanInfo, const SBrinRecord* pRecord, + int32_t startIndex) { + size_t num = taosArrayGetSize(pBlockScanInfo->delSkyline); + + for (int32_t i = startIndex; i < num; i += 1) { + TSDBKEY* p = taosArrayGet(pBlockScanInfo->delSkyline, i); + if (p->ts >= pRecord->firstKey && p->ts <= pRecord->lastKey) { + if (p->version >= pRecord->minVer) { + return true; + } + } else if (p->ts < pRecord->firstKey) { // p->ts < pBlock->minKey.ts + if (p->version >= pRecord->minVer) { + if (i < num - 1) { + TSDBKEY* pnext = taosArrayGet(pBlockScanInfo->delSkyline, i + 1); + if (pnext->ts >= pRecord->firstKey) { + return true; + } + } else { // it must be the last point + ASSERT(p->version == 0); + } + } + } else { // (p->ts > pBlock->maxKey.ts) { + return false; + } + } + + return false; +} + +static bool overlapWithDelSkyline(STableBlockScanInfo* pBlockScanInfo, const SBrinRecord* pRecord, int32_t order) { + if (pBlockScanInfo->delSkyline == NULL || (taosArrayGetSize(pBlockScanInfo->delSkyline) == 0)) { + return false; + } + + // ts is not overlap + TSDBKEY* pFirst = taosArrayGet(pBlockScanInfo->delSkyline, 0); + TSDBKEY* pLast = taosArrayGetLast(pBlockScanInfo->delSkyline); + if (pRecord->firstKey > pLast->ts || pRecord->lastKey < pFirst->ts) { + return false; + } + + // version is not overlap + if (ASCENDING_TRAVERSE(order)) { + return doCheckforDatablockOverlap(pBlockScanInfo, pRecord, pBlockScanInfo->fileDelIndex); + } else { + int32_t index = pBlockScanInfo->fileDelIndex; + while (1) { + TSDBKEY* p = taosArrayGet(pBlockScanInfo->delSkyline, index); + if (p->ts > pRecord->firstKey && index > 0) { + index -= 1; + } else { // find the first point that is smaller than the minKey.ts of dataBlock. + if (p->ts == pRecord->firstKey && p->version < pRecord->maxVer && index > 0) { + index -= 1; + } + break; + } + } + + return doCheckforDatablockOverlap(pBlockScanInfo, pRecord, index); + } +} + +typedef struct { + bool overlapWithNeighborBlock; + bool hasDupTs; + bool overlapWithDelInfo; + bool overlapWithLastBlock; + bool overlapWithKeyInBuf; + bool partiallyRequired; + bool moreThanCapcity; +} SDataBlockToLoadInfo; + +static void getBlockToLoadInfo(SDataBlockToLoadInfo* pInfo, SFileDataBlockInfo* pBlockInfo, + STableBlockScanInfo* pScanInfo, TSDBKEY keyInBuf, SLastBlockReader* pLastBlockReader, + STsdbReader* pReader) { + int32_t neighborIndex = 0; + SBrinRecord rec = {0}; + + bool hasNeighbor = getNeighborBlockOfSameTable(pBlockInfo, pScanInfo, &neighborIndex, pReader->info.order, &rec); + + // overlap with neighbor + if (hasNeighbor) { + pInfo->overlapWithNeighborBlock = overlapWithNeighborBlock2(pBlockInfo, &rec, pReader->info.order); + } + + // has duplicated ts of different version in this block + pInfo->hasDupTs = (pBlockInfo->record.numRow > pBlockInfo->record.count); + pInfo->overlapWithDelInfo = overlapWithDelSkyline(pScanInfo, &pBlockInfo->record, pReader->info.order); + + if (hasDataInLastBlock(pLastBlockReader)) { + int64_t tsLast = getCurrentKeyInLastBlock(pLastBlockReader); + pInfo->overlapWithLastBlock = !(pBlockInfo->record.lastKey < tsLast || pBlockInfo->record.firstKey > tsLast); + } + + pInfo->moreThanCapcity = pBlockInfo->record.numRow > pReader->resBlockInfo.capacity; + pInfo->partiallyRequired = dataBlockPartiallyRequired(&pReader->info.window, &pReader->info.verRange, pBlockInfo); + pInfo->overlapWithKeyInBuf = keyOverlapFileBlock(keyInBuf, pBlockInfo, &pReader->info.verRange); +} + +// 1. the version of all rows should be less than the endVersion +// 2. current block should not overlap with next neighbor block +// 3. current timestamp should not be overlap with each other +// 4. output buffer should be large enough to hold all rows in current block +// 5. delete info should not overlap with current block data +// 6. current block should not contain the duplicated ts +static bool fileBlockShouldLoad(STsdbReader* pReader, SFileDataBlockInfo* pBlockInfo, STableBlockScanInfo* pScanInfo, + TSDBKEY keyInBuf, SLastBlockReader* pLastBlockReader) { + SDataBlockToLoadInfo info = {0}; + getBlockToLoadInfo(&info, pBlockInfo, pScanInfo, keyInBuf, pLastBlockReader, pReader); + + bool loadDataBlock = + (info.overlapWithNeighborBlock || info.hasDupTs || info.partiallyRequired || info.overlapWithKeyInBuf || + info.moreThanCapcity || info.overlapWithDelInfo || info.overlapWithLastBlock); + + // log the reason why load the datablock for profile + if (loadDataBlock) { + tsdbDebug("%p uid:%" PRIu64 + " need to load the datablock, overlapneighbor:%d, hasDup:%d, partiallyRequired:%d, " + "overlapWithKey:%d, greaterThanBuf:%d, overlapWithDel:%d, overlapWithlastBlock:%d, %s", + pReader, pBlockInfo->uid, info.overlapWithNeighborBlock, info.hasDupTs, info.partiallyRequired, + info.overlapWithKeyInBuf, info.moreThanCapcity, info.overlapWithDelInfo, info.overlapWithLastBlock, + pReader->idStr); + } + + return loadDataBlock; +} + +static bool isCleanFileDataBlock(STsdbReader* pReader, SFileDataBlockInfo* pBlockInfo, STableBlockScanInfo* pScanInfo, + TSDBKEY keyInBuf, SLastBlockReader* pLastBlockReader) { + SDataBlockToLoadInfo info = {0}; + getBlockToLoadInfo(&info, pBlockInfo, pScanInfo, keyInBuf, pLastBlockReader, pReader); + bool isCleanFileBlock = !(info.overlapWithNeighborBlock || info.hasDupTs || info.overlapWithKeyInBuf || + info.overlapWithDelInfo || info.overlapWithLastBlock); + return isCleanFileBlock; +} + +static int32_t buildDataBlockFromBuf(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo, int64_t endKey) { + if (!(pBlockScanInfo->iiter.hasVal || pBlockScanInfo->iter.hasVal)) { + return TSDB_CODE_SUCCESS; + } + + SSDataBlock* pBlock = pReader->resBlockInfo.pResBlock; + + int64_t st = taosGetTimestampUs(); + int32_t code = buildDataBlockFromBufImpl(pBlockScanInfo, endKey, pReader->resBlockInfo.capacity, pReader); + + blockDataUpdateTsWindow(pBlock, pReader->suppInfo.slotId[0]); + pBlock->info.id.uid = pBlockScanInfo->uid; + + setComposedBlockFlag(pReader, true); + + double elapsedTime = (taosGetTimestampUs() - st) / 1000.0; + tsdbDebug("%p build data block from cache completed, elapsed time:%.2f ms, numOfRows:%" PRId64 ", brange:%" PRId64 + " - %" PRId64 ", uid:%" PRIu64 ", %s", + pReader, elapsedTime, pBlock->info.rows, pBlock->info.window.skey, pBlock->info.window.ekey, + pBlockScanInfo->uid, pReader->idStr); + + pReader->cost.buildmemBlock += elapsedTime; + return code; +} + +static bool tryCopyDistinctRowFromFileBlock(STsdbReader* pReader, SBlockData* pBlockData, int64_t key, + SFileBlockDumpInfo* pDumpInfo, bool* copied) { + // opt version + // 1. it is not a border point + // 2. the direct next point is not an duplicated timestamp + int32_t code = TSDB_CODE_SUCCESS; + + *copied = false; + bool asc = (pReader->info.order == TSDB_ORDER_ASC); + if ((pDumpInfo->rowIndex < pDumpInfo->totalRows - 1 && asc) || (pDumpInfo->rowIndex > 0 && (!asc))) { + int32_t step = pReader->info.order == TSDB_ORDER_ASC ? 1 : -1; + + int64_t nextKey = pBlockData->aTSKEY[pDumpInfo->rowIndex + step]; + if (nextKey != key) { // merge is not needed + code = doAppendRowFromFileBlock(pReader->resBlockInfo.pResBlock, pReader, pBlockData, pDumpInfo->rowIndex); + if (code) { + return code; + } + pDumpInfo->rowIndex += step; + *copied = true; + } + } + + return code; +} + +static bool nextRowFromLastBlocks(SLastBlockReader* pLastBlockReader, STableBlockScanInfo* pScanInfo, + SVersionRange* pVerRange) { + int32_t step = ASCENDING_TRAVERSE(pLastBlockReader->order) ? 1 : -1; + + while (1) { + bool hasVal = tMergeTreeNext(&pLastBlockReader->mergeTree); + if (!hasVal) { // the next value will be the accessed key in stt + pScanInfo->lastKeyInStt += step; + return false; + } + + TSDBROW* pRow = tMergeTreeGetRow(&pLastBlockReader->mergeTree); + int64_t key = pRow->pBlockData->aTSKEY[pRow->iRow]; + int64_t ver = pRow->pBlockData->aVersion[pRow->iRow]; + + pLastBlockReader->currentKey = key; + pScanInfo->lastKeyInStt = key; + + if (!hasBeenDropped(pScanInfo->delSkyline, &pScanInfo->lastBlockDelIndex, key, ver, pLastBlockReader->order, + pVerRange)) { + return true; + } + } +} + +static bool tryCopyDistinctRowFromSttBlock(TSDBROW* fRow, SLastBlockReader* pLastBlockReader, + STableBlockScanInfo* pScanInfo, int64_t ts, STsdbReader* pReader, + bool* copied) { + int32_t code = TSDB_CODE_SUCCESS; + + *copied = false; + + bool hasVal = nextRowFromLastBlocks(pLastBlockReader, pScanInfo, &pReader->info.verRange); + if (hasVal) { + int64_t next1 = getCurrentKeyInLastBlock(pLastBlockReader); + if (next1 != ts) { + code = doAppendRowFromFileBlock(pReader->resBlockInfo.pResBlock, pReader, fRow->pBlockData, fRow->iRow); + if (code) { + return code; + } + + *copied = true; + return code; + } + } else { + code = doAppendRowFromFileBlock(pReader->resBlockInfo.pResBlock, pReader, fRow->pBlockData, fRow->iRow); + if (code) { + return code; + } + + *copied = true; + return code; + } + + return code; +} + +static FORCE_INLINE STSchema* doGetSchemaForTSRow(int32_t sversion, STsdbReader* pReader, uint64_t uid) { + // always set the newest schema version in pReader->info.pSchema + if (pReader->info.pSchema == NULL) { + STSchema* ps = getTableSchemaImpl(pReader, uid); + if (ps == NULL) { + return NULL; + } + } + + if (pReader->info.pSchema && sversion == pReader->info.pSchema->version) { + return pReader->info.pSchema; + } + + void** p = tSimpleHashGet(pReader->pSchemaMap, &sversion, sizeof(sversion)); + if (p != NULL) { + return *(STSchema**)p; + } + + STSchema* ptr = NULL; + int32_t code = metaGetTbTSchemaEx(pReader->pTsdb->pVnode->pMeta, pReader->info.suid, uid, sversion, &ptr); + if (code != TSDB_CODE_SUCCESS) { + terrno = code; + return NULL; + } else { + code = tSimpleHashPut(pReader->pSchemaMap, &sversion, sizeof(sversion), &ptr, POINTER_BYTES); + if (code != TSDB_CODE_SUCCESS) { + terrno = code; + return NULL; + } + return ptr; + } +} + +static int32_t doMergeBufAndFileRows(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo, TSDBROW* pRow, + SIterInfo* pIter, int64_t key, SLastBlockReader* pLastBlockReader) { + SRowMerger* pMerger = &pReader->status.merger; + SRow* pTSRow = NULL; + SBlockData* pBlockData = &pReader->status.fileBlockData; + SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; + + int64_t tsLast = INT64_MIN; + if (hasDataInLastBlock(pLastBlockReader)) { + tsLast = getCurrentKeyInLastBlock(pLastBlockReader); + } + + TSDBKEY k = TSDBROW_KEY(pRow); + TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex); + + // merge is not initialized yet, due to the fact that the pReader->info.pSchema is not initialized + if (pMerger->pArray == NULL) { + ASSERT(pReader->info.pSchema == NULL); + STSchema* ps = getTableSchemaImpl(pReader, pBlockScanInfo->uid); + if (ps == NULL) { + return terrno; + } + } + + int64_t minKey = 0; + if (pReader->info.order == TSDB_ORDER_ASC) { + minKey = INT64_MAX; // chosen the minimum value + if (minKey > tsLast && hasDataInLastBlock(pLastBlockReader)) { + minKey = tsLast; + } + + if (minKey > k.ts) { + minKey = k.ts; + } + + if (minKey > key && hasDataInFileBlock(pBlockData, pDumpInfo)) { + minKey = key; + } + } else { + minKey = INT64_MIN; + if (minKey < tsLast && hasDataInLastBlock(pLastBlockReader)) { + minKey = tsLast; + } + + if (minKey < k.ts) { + minKey = k.ts; + } + + if (minKey < key && hasDataInFileBlock(pBlockData, pDumpInfo)) { + minKey = key; + } + } + + // todo remove init + bool init = false; + + // ASC: file block ---> last block -----> imem -----> mem + // DESC: mem -----> imem -----> last block -----> file block + if (pReader->info.order == TSDB_ORDER_ASC) { + if (minKey == key) { + init = true; + int32_t code = tsdbRowMergerAdd(pMerger, &fRow, pReader->info.pSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader); + } + + if (minKey == tsLast) { + TSDBROW* fRow1 = tMergeTreeGetRow(&pLastBlockReader->mergeTree); + if (init) { + tsdbRowMergerAdd(pMerger, fRow1, NULL); + } else { + init = true; + int32_t code = tsdbRowMergerAdd(pMerger, fRow1, pReader->info.pSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, tsLast, pMerger, &pReader->info.verRange, + pReader->idStr); + } + + if (minKey == k.ts) { + STSchema* pSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(pRow), pReader, pBlockScanInfo->uid); + if (pSchema == NULL) { + return terrno; + } + if (init) { + tsdbRowMergerAdd(pMerger, pRow, pSchema); + } else { + init = true; + int32_t code = tsdbRowMergerAdd(pMerger, pRow, pSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + int32_t code = doMergeRowsInBuf(pIter, pBlockScanInfo->uid, k.ts, pBlockScanInfo->delSkyline, pReader); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + } else { + if (minKey == k.ts) { + init = true; + STSchema* pSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(pRow), pReader, pBlockScanInfo->uid); + if (pSchema == NULL) { + return terrno; + } + + int32_t code = tsdbRowMergerAdd(pMerger, pRow, pSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + code = doMergeRowsInBuf(pIter, pBlockScanInfo->uid, k.ts, pBlockScanInfo->delSkyline, pReader); + if (code != TSDB_CODE_SUCCESS || pMerger->pTSchema == NULL) { + return code; + } + } + + if (minKey == tsLast) { + TSDBROW* fRow1 = tMergeTreeGetRow(&pLastBlockReader->mergeTree); + if (init) { + tsdbRowMergerAdd(pMerger, fRow1, NULL); + } else { + init = true; + int32_t code = tsdbRowMergerAdd(pMerger, fRow1, pReader->info.pSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, tsLast, pMerger, &pReader->info.verRange, + pReader->idStr); + } + + if (minKey == key) { + if (init) { + tsdbRowMergerAdd(pMerger, &fRow, NULL); + } else { + init = true; + int32_t code = tsdbRowMergerAdd(pMerger, &fRow, pReader->info.pSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader); + } + } + + int32_t code = tsdbRowMergerGetRow(pMerger, &pTSRow); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + code = doAppendRowFromTSRow(pReader->resBlockInfo.pResBlock, pReader, pTSRow, pBlockScanInfo); + + taosMemoryFree(pTSRow); + tsdbRowMergerClear(pMerger); + + return code; +} + +static int32_t doMergeFileBlockAndLastBlock(SLastBlockReader* pLastBlockReader, STsdbReader* pReader, + STableBlockScanInfo* pBlockScanInfo, SBlockData* pBlockData, + bool mergeBlockData) { + SRowMerger* pMerger = &pReader->status.merger; + SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; + + int64_t tsLastBlock = getCurrentKeyInLastBlock(pLastBlockReader); + bool copied = false; + int32_t code = TSDB_CODE_SUCCESS; + SRow* pTSRow = NULL; + TSDBROW* pRow = tMergeTreeGetRow(&pLastBlockReader->mergeTree); + + // create local variable to hold the row value + TSDBROW fRow = {.iRow = pRow->iRow, .type = TSDBROW_COL_FMT, .pBlockData = pRow->pBlockData}; + + tsdbTrace("fRow ptr:%p, %d, uid:%" PRIu64 ", %s", pRow->pBlockData, pRow->iRow, pLastBlockReader->uid, + pReader->idStr); + + // only last block exists + if ((!mergeBlockData) || (tsLastBlock != pBlockData->aTSKEY[pDumpInfo->rowIndex])) { + code = tryCopyDistinctRowFromSttBlock(&fRow, pLastBlockReader, pBlockScanInfo, tsLastBlock, pReader, &copied); + if (code) { + return code; + } + + if (copied) { + pBlockScanInfo->lastKey = tsLastBlock; + return TSDB_CODE_SUCCESS; + } else { + code = tsdbRowMergerAdd(pMerger, &fRow, pReader->info.pSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + TSDBROW* pRow1 = tMergeTreeGetRow(&pLastBlockReader->mergeTree); + tsdbRowMergerAdd(pMerger, pRow1, NULL); + doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, tsLastBlock, pMerger, &pReader->info.verRange, + pReader->idStr); + + code = tsdbRowMergerGetRow(pMerger, &pTSRow); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + code = doAppendRowFromTSRow(pReader->resBlockInfo.pResBlock, pReader, pTSRow, pBlockScanInfo); + + taosMemoryFree(pTSRow); + tsdbRowMergerClear(pMerger); + + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + } else { // not merge block data + code = tsdbRowMergerAdd(pMerger, &fRow, pReader->info.pSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, tsLastBlock, pMerger, &pReader->info.verRange, + pReader->idStr); + + // merge with block data if ts == key + if (tsLastBlock == pBlockData->aTSKEY[pDumpInfo->rowIndex]) { + doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader); + } + + code = tsdbRowMergerGetRow(pMerger, &pTSRow); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + code = doAppendRowFromTSRow(pReader->resBlockInfo.pResBlock, pReader, pTSRow, pBlockScanInfo); + + taosMemoryFree(pTSRow); + tsdbRowMergerClear(pMerger); + + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + + return TSDB_CODE_SUCCESS; +} + +static int32_t mergeFileBlockAndLastBlock(STsdbReader* pReader, SLastBlockReader* pLastBlockReader, int64_t key, + STableBlockScanInfo* pBlockScanInfo, SBlockData* pBlockData) { + SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; + SRowMerger* pMerger = &pReader->status.merger; + + // merge is not initialized yet, due to the fact that the pReader->info.pSchema is not initialized + if (pMerger->pArray == NULL) { + ASSERT(pReader->info.pSchema == NULL); + STSchema* ps = getTableSchemaImpl(pReader, pBlockScanInfo->uid); + if (ps == NULL) { + return terrno; + } + } + + if (hasDataInFileBlock(pBlockData, pDumpInfo)) { + // no last block available, only data block exists + if (!hasDataInLastBlock(pLastBlockReader)) { + return mergeRowsInFileBlocks(pBlockData, pBlockScanInfo, key, pReader); + } + + // row in last file block + TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex); + int64_t ts = getCurrentKeyInLastBlock(pLastBlockReader); + + if (ASCENDING_TRAVERSE(pReader->info.order)) { + if (key < ts) { // imem, mem are all empty, file blocks (data blocks and last block) exist + return mergeRowsInFileBlocks(pBlockData, pBlockScanInfo, key, pReader); + } else if (key == ts) { + SRow* pTSRow = NULL; + int32_t code = tsdbRowMergerAdd(pMerger, &fRow, pReader->info.pSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader); + + TSDBROW* pRow1 = tMergeTreeGetRow(&pLastBlockReader->mergeTree); + tsdbRowMergerAdd(pMerger, pRow1, NULL); + + doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, ts, pMerger, &pReader->info.verRange, pReader->idStr); + + code = tsdbRowMergerGetRow(pMerger, &pTSRow); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + code = doAppendRowFromTSRow(pReader->resBlockInfo.pResBlock, pReader, pTSRow, pBlockScanInfo); + + taosMemoryFree(pTSRow); + tsdbRowMergerClear(pMerger); + return code; + } else { // key > ts + return doMergeFileBlockAndLastBlock(pLastBlockReader, pReader, pBlockScanInfo, NULL, false); + } + } else { // desc order + return doMergeFileBlockAndLastBlock(pLastBlockReader, pReader, pBlockScanInfo, pBlockData, true); + } + } else { // only last block exists + return doMergeFileBlockAndLastBlock(pLastBlockReader, pReader, pBlockScanInfo, NULL, false); + } +} + +static int32_t doMergeMultiLevelRows(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo, SBlockData* pBlockData, + SLastBlockReader* pLastBlockReader) { + SRowMerger* pMerger = &pReader->status.merger; + SRow* pTSRow = NULL; + int32_t code = TSDB_CODE_SUCCESS; + SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; + SArray* pDelList = pBlockScanInfo->delSkyline; + + TSDBROW* pRow = getValidMemRow(&pBlockScanInfo->iter, pDelList, pReader); + TSDBROW* piRow = getValidMemRow(&pBlockScanInfo->iiter, pDelList, pReader); + + int64_t tsLast = INT64_MIN; + if (hasDataInLastBlock(pLastBlockReader)) { + tsLast = getCurrentKeyInLastBlock(pLastBlockReader); + } + + int64_t key = hasDataInFileBlock(pBlockData, pDumpInfo) ? pBlockData->aTSKEY[pDumpInfo->rowIndex] : INT64_MIN; + + TSDBKEY k = TSDBROW_KEY(pRow); + TSDBKEY ik = TSDBROW_KEY(piRow); + STSchema* pSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(pRow), pReader, pBlockScanInfo->uid); + if (pSchema == NULL) { + return code; + } + + STSchema* piSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(piRow), pReader, pBlockScanInfo->uid); + if (piSchema == NULL) { + return code; + } + + // merge is not initialized yet, due to the fact that the pReader->info.pSchema is not initialized + if (pMerger->pArray == NULL) { + ASSERT(pReader->info.pSchema == NULL); + STSchema* ps = getTableSchemaImpl(pReader, pBlockScanInfo->uid); + if (ps == NULL) { + return terrno; + } + } + + int64_t minKey = 0; + if (ASCENDING_TRAVERSE(pReader->info.order)) { + minKey = INT64_MAX; // let's find the minimum + if (minKey > k.ts) { + minKey = k.ts; + } + + if (minKey > ik.ts) { + minKey = ik.ts; + } + + if (minKey > key && hasDataInFileBlock(pBlockData, pDumpInfo)) { + minKey = key; + } + + if (minKey > tsLast && hasDataInLastBlock(pLastBlockReader)) { + minKey = tsLast; + } + } else { + minKey = INT64_MIN; // let find the maximum ts value + if (minKey < k.ts) { + minKey = k.ts; + } + + if (minKey < ik.ts) { + minKey = ik.ts; + } + + if (minKey < key && hasDataInFileBlock(pBlockData, pDumpInfo)) { + minKey = key; + } + + if (minKey < tsLast && hasDataInLastBlock(pLastBlockReader)) { + minKey = tsLast; + } + } + + bool init = false; + + // ASC: file block -----> last block -----> imem -----> mem + // DESC: mem -----> imem -----> last block -----> file block + if (ASCENDING_TRAVERSE(pReader->info.order)) { + if (minKey == key) { + init = true; + TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex); + code = tsdbRowMergerAdd(pMerger, &fRow, pReader->info.pSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader); + } + + if (minKey == tsLast) { + TSDBROW* pRow1 = tMergeTreeGetRow(&pLastBlockReader->mergeTree); + if (init) { + tsdbRowMergerAdd(pMerger, pRow1, NULL); + } else { + init = true; + code = tsdbRowMergerAdd(pMerger, pRow1, pReader->info.pSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + + doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, tsLast, pMerger, &pReader->info.verRange, + pReader->idStr); + } + + if (minKey == ik.ts) { + if (init) { + tsdbRowMergerAdd(pMerger, piRow, piSchema); + } else { + init = true; + code = tsdbRowMergerAdd(pMerger, piRow, piSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + + code = doMergeRowsInBuf(&pBlockScanInfo->iiter, pBlockScanInfo->uid, ik.ts, pBlockScanInfo->delSkyline, pReader); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + + if (minKey == k.ts) { + if (init) { + tsdbRowMergerAdd(pMerger, pRow, pSchema); + } else { + // STSchema* pSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(pRow), pReader, pBlockScanInfo->uid); + code = tsdbRowMergerAdd(pMerger, pRow, pSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + code = doMergeRowsInBuf(&pBlockScanInfo->iter, pBlockScanInfo->uid, k.ts, pBlockScanInfo->delSkyline, pReader); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + } else { + if (minKey == k.ts) { + init = true; + code = tsdbRowMergerAdd(pMerger, pRow, pSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + code = doMergeRowsInBuf(&pBlockScanInfo->iter, pBlockScanInfo->uid, k.ts, pBlockScanInfo->delSkyline, pReader); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + + if (minKey == ik.ts) { + if (init) { + tsdbRowMergerAdd(pMerger, piRow, piSchema); + } else { + init = true; + code = tsdbRowMergerAdd(pMerger, piRow, piSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + code = doMergeRowsInBuf(&pBlockScanInfo->iiter, pBlockScanInfo->uid, ik.ts, pBlockScanInfo->delSkyline, pReader); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + + if (minKey == tsLast) { + TSDBROW* pRow1 = tMergeTreeGetRow(&pLastBlockReader->mergeTree); + if (init) { + tsdbRowMergerAdd(pMerger, pRow1, NULL); + } else { + init = true; + code = tsdbRowMergerAdd(pMerger, pRow1, pReader->info.pSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, tsLast, pMerger, &pReader->info.verRange, + pReader->idStr); + } + + if (minKey == key) { + TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex); + if (!init) { + code = tsdbRowMergerAdd(pMerger, &fRow, pReader->info.pSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } else { + tsdbRowMergerAdd(pMerger, &fRow, NULL); + } + doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader); + } + } + + code = tsdbRowMergerGetRow(pMerger, &pTSRow); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + code = doAppendRowFromTSRow(pReader->resBlockInfo.pResBlock, pReader, pTSRow, pBlockScanInfo); + + taosMemoryFree(pTSRow); + tsdbRowMergerClear(pMerger); + return code; +} + +int32_t doInitMemDataIter(STsdbReader* pReader, STbData** pData, STableBlockScanInfo* pBlockScanInfo, TSDBKEY* pKey, + SMemTable* pMem, SIterInfo* pIter, const char* type) { + int32_t code = TSDB_CODE_SUCCESS; + int32_t backward = (!ASCENDING_TRAVERSE(pReader->info.order)); + pIter->hasVal = false; + + if (pMem != NULL) { + *pData = tsdbGetTbDataFromMemTable(pMem, pReader->info.suid, pBlockScanInfo->uid); + + if ((*pData) != NULL) { + code = tsdbTbDataIterCreate((*pData), pKey, backward, &pIter->iter); + if (code == TSDB_CODE_SUCCESS) { + pIter->hasVal = (tsdbTbDataIterGet(pIter->iter) != NULL); + + tsdbDebug("%p uid:%" PRIu64 ", check data in %s from skey:%" PRId64 ", order:%d, ts range in buf:%" PRId64 + "-%" PRId64 " %s", + pReader, pBlockScanInfo->uid, type, pKey->ts, pReader->info.order, (*pData)->minKey, (*pData)->maxKey, + pReader->idStr); + } else { + tsdbError("%p uid:%" PRIu64 ", failed to create iterator for %s, code:%s, %s", pReader, pBlockScanInfo->uid, + type, tstrerror(code), pReader->idStr); + return code; + } + } + } else { + tsdbDebug("%p uid:%" PRIu64 ", no data in %s, %s", pReader, pBlockScanInfo->uid, type, pReader->idStr); + } + + return code; +} + +static int32_t initMemDataIterator(STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader) { + if (pBlockScanInfo->iterInit) { + return TSDB_CODE_SUCCESS; + } + + STbData* d = NULL; + TSDBKEY startKey = {0}; + if (ASCENDING_TRAVERSE(pReader->info.order)) { + startKey = (TSDBKEY){.ts = pBlockScanInfo->lastKey + 1, .version = pReader->info.verRange.minVer}; + } else { + startKey = (TSDBKEY){.ts = pBlockScanInfo->lastKey - 1, .version = pReader->info.verRange.maxVer}; + } + + int32_t code = + doInitMemDataIter(pReader, &d, pBlockScanInfo, &startKey, pReader->pReadSnap->pMem, &pBlockScanInfo->iter, "mem"); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + STbData* di = NULL; + code = doInitMemDataIter(pReader, &di, pBlockScanInfo, &startKey, pReader->pReadSnap->pIMem, &pBlockScanInfo->iiter, + "imem"); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + loadMemTombData(&pBlockScanInfo->pMemDelData, d, di, pReader->info.verRange.maxVer); + + pBlockScanInfo->iterInit = true; + return TSDB_CODE_SUCCESS; +} + +static bool isValidFileBlockRow(SBlockData* pBlockData, SFileBlockDumpInfo* pDumpInfo, + STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader) { + // it is an multi-table data block + if (pBlockData->aUid != NULL) { + uint64_t uid = pBlockData->aUid[pDumpInfo->rowIndex]; + if (uid != pBlockScanInfo->uid) { // move to next row + return false; + } + } + + // check for version and time range + int64_t ver = pBlockData->aVersion[pDumpInfo->rowIndex]; + if (ver > pReader->info.verRange.maxVer || ver < pReader->info.verRange.minVer) { + return false; + } + + int64_t ts = pBlockData->aTSKEY[pDumpInfo->rowIndex]; + if (ts > pReader->info.window.ekey || ts < pReader->info.window.skey) { + return false; + } + + if (hasBeenDropped(pBlockScanInfo->delSkyline, &pBlockScanInfo->fileDelIndex, ts, ver, pReader->info.order, + &pReader->info.verRange)) { + return false; + } + + return true; +} + +static bool initLastBlockReader(SLastBlockReader* pLBlockReader, STableBlockScanInfo* pScanInfo, STsdbReader* pReader) { + // the last block reader has been initialized for this table. + if (pLBlockReader->uid == pScanInfo->uid) { + return hasDataInLastBlock(pLBlockReader); + } + + if (pLBlockReader->uid != 0) { + tMergeTreeClose(&pLBlockReader->mergeTree); + } + + pLBlockReader->uid = pScanInfo->uid; + + STimeWindow w = pLBlockReader->window; + if (ASCENDING_TRAVERSE(pLBlockReader->order)) { + w.skey = pScanInfo->lastKeyInStt; + } else { + w.ekey = pScanInfo->lastKeyInStt; + } + + int64_t st = taosGetTimestampUs(); + tsdbDebug("init last block reader, window:%" PRId64 "-%" PRId64 ", uid:%" PRIu64 ", %s", w.skey, w.ekey, + pScanInfo->uid, pReader->idStr); + + SMergeTreeConf conf = { + .uid = pScanInfo->uid, + .suid = pReader->info.suid, + .pTsdb = pReader->pTsdb, + .timewindow = w, + .verRange = pLBlockReader->verRange, + .strictTimeRange = false, + .pSchema = pReader->info.pSchema, + .pCurrentFileset = pReader->status.pCurrentFileset, + .backward = (pLBlockReader->order == TSDB_ORDER_DESC), + .pSttFileBlockIterArray = pReader->status.pLDataIterArray, + .pCols = pReader->suppInfo.colId, + .numOfCols = pReader->suppInfo.numOfCols, + .loadTombFn = loadSttTombDataForAll, + .pReader = pReader, + .idstr = pReader->idStr, + }; + + int32_t code = tMergeTreeOpen2(&pLBlockReader->mergeTree, &conf); + if (code != TSDB_CODE_SUCCESS) { + return false; + } + + initMemDataIterator(pScanInfo, pReader); + initDelSkylineIterator(pScanInfo, pReader->info.order, &pReader->cost); + + code = nextRowFromLastBlocks(pLBlockReader, pScanInfo, &pReader->info.verRange); + + int64_t el = taosGetTimestampUs() - st; + pReader->cost.initLastBlockReader += (el / 1000.0); + + tsdbDebug("init last block reader completed, elapsed time:%" PRId64 "us %s", el, pReader->idStr); + return code; +} + +static bool hasDataInLastBlock(SLastBlockReader* pLastBlockReader) { return pLastBlockReader->mergeTree.pIter != NULL; } + +bool hasDataInFileBlock(const SBlockData* pBlockData, const SFileBlockDumpInfo* pDumpInfo) { + if ((pBlockData->nRow > 0) && (pBlockData->nRow != pDumpInfo->totalRows)) { + return false; // this is an invalid result. + } + return pBlockData->nRow > 0 && (!pDumpInfo->allDumped); +} + +int32_t mergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pBlockScanInfo, int64_t key, + STsdbReader* pReader) { + SRowMerger* pMerger = &pReader->status.merger; + SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; + bool copied = false; + + int32_t code = tryCopyDistinctRowFromFileBlock(pReader, pBlockData, key, pDumpInfo, &copied); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + // merge is not initialized yet, due to the fact that the pReader->info.pSchema is not initialized + if (pMerger->pArray == NULL) { + ASSERT(pReader->info.pSchema == NULL); + STSchema* ps = getTableSchemaImpl(pReader, pBlockScanInfo->uid); + if (ps == NULL) { + return terrno; + } + } + + if (copied) { + pBlockScanInfo->lastKey = key; + return TSDB_CODE_SUCCESS; + } else { + TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex); + + SRow* pTSRow = NULL; + code = tsdbRowMergerAdd(pMerger, &fRow, pReader->info.pSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader); + code = tsdbRowMergerGetRow(pMerger, &pTSRow); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + code = doAppendRowFromTSRow(pReader->resBlockInfo.pResBlock, pReader, pTSRow, pBlockScanInfo); + + taosMemoryFree(pTSRow); + tsdbRowMergerClear(pMerger); + return code; + } +} + +static int32_t buildComposedDataBlockImpl(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo, + SBlockData* pBlockData, SLastBlockReader* pLastBlockReader) { + SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; + + TSDBROW *pRow = NULL, *piRow = NULL; + int64_t key = (pBlockData->nRow > 0 && (!pDumpInfo->allDumped)) ? pBlockData->aTSKEY[pDumpInfo->rowIndex] : INT64_MIN; + if (pBlockScanInfo->iter.hasVal) { + pRow = getValidMemRow(&pBlockScanInfo->iter, pBlockScanInfo->delSkyline, pReader); + } + + if (pBlockScanInfo->iiter.hasVal) { + piRow = getValidMemRow(&pBlockScanInfo->iiter, pBlockScanInfo->delSkyline, pReader); + } + + // two levels of mem-table does contain the valid rows + if (pRow != NULL && piRow != NULL) { + return doMergeMultiLevelRows(pReader, pBlockScanInfo, pBlockData, pLastBlockReader); + } + + // imem + file + last block + if (pBlockScanInfo->iiter.hasVal) { + return doMergeBufAndFileRows(pReader, pBlockScanInfo, piRow, &pBlockScanInfo->iiter, key, pLastBlockReader); + } + + // mem + file + last block + if (pBlockScanInfo->iter.hasVal) { + return doMergeBufAndFileRows(pReader, pBlockScanInfo, pRow, &pBlockScanInfo->iter, key, pLastBlockReader); + } + + // files data blocks + last block + return mergeFileBlockAndLastBlock(pReader, pLastBlockReader, key, pBlockScanInfo, pBlockData); +} + +static int32_t loadNeighborIfOverlap(SFileDataBlockInfo* pBlockInfo, STableBlockScanInfo* pBlockScanInfo, + STsdbReader* pReader, bool* loadNeighbor) { + int32_t code = TSDB_CODE_SUCCESS; + int32_t step = ASCENDING_TRAVERSE(pReader->info.order) ? 1 : -1; + int32_t nextIndex = -1; + + *loadNeighbor = false; + + SBrinRecord rec = {0}; + bool hasNeighbor = getNeighborBlockOfSameTable(pBlockInfo, pBlockScanInfo, &nextIndex, pReader->info.order, &rec); + if (!hasNeighbor) { // do nothing + return code; + } + + if (overlapWithNeighborBlock2(pBlockInfo, &rec, pReader->info.order)) { // load next block + SReaderStatus* pStatus = &pReader->status; + SDataBlockIter* pBlockIter = &pStatus->blockIter; + + // 1. find the next neighbor block in the scan block list + SFileDataBlockInfo fb = {.uid = pBlockInfo->uid, .tbBlockIdx = nextIndex}; + int32_t neighborIndex = findFileBlockInfoIndex(pBlockIter, &fb); + + // 2. remove it from the scan block list + setFileBlockActiveInBlockIter(pBlockIter, neighborIndex, step); + + // 3. load the neighbor block, and set it to be the currently accessed file data block + code = doLoadFileBlockData(pReader, pBlockIter, &pStatus->fileBlockData, pBlockInfo->uid); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + // 4. check the data values + initBlockDumpInfo(pReader, pBlockIter); + *loadNeighbor = true; + } + + return code; +} + +static void updateComposedBlockInfo(STsdbReader* pReader, double el, STableBlockScanInfo* pBlockScanInfo) { + SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; + + pResBlock->info.id.uid = (pBlockScanInfo != NULL) ? pBlockScanInfo->uid : 0; + pResBlock->info.dataLoad = 1; + blockDataUpdateTsWindow(pResBlock, pReader->suppInfo.slotId[0]); + + setComposedBlockFlag(pReader, true); + + pReader->cost.composedBlocks += 1; + pReader->cost.buildComposedBlockTime += el; +} + +static int32_t buildComposedDataBlock(STsdbReader* pReader) { + int32_t code = TSDB_CODE_SUCCESS; + + SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; + + SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(&pReader->status.blockIter); + SLastBlockReader* pLastBlockReader = pReader->status.fileIter.pLastBlockReader; + + bool asc = ASCENDING_TRAVERSE(pReader->info.order); + int64_t st = taosGetTimestampUs(); + int32_t step = asc ? 1 : -1; + double el = 0; + SBrinRecord* pRecord = &pBlockInfo->record; + + SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; + + STableBlockScanInfo* pBlockScanInfo = NULL; + if (pBlockInfo != NULL) { + if (pReader->pIgnoreTables && taosHashGet(*pReader->pIgnoreTables, &pBlockInfo->uid, sizeof(pBlockInfo->uid))) { + setBlockAllDumped(pDumpInfo, pRecord->lastKey, pReader->info.order); + return code; + } + + pBlockScanInfo = getTableBlockScanInfo(pReader->status.pTableMap, pBlockInfo->uid, pReader->idStr); + if (pBlockScanInfo == NULL) { + goto _end; + } + + pRecord = &pBlockInfo->record; + TSDBKEY keyInBuf = getCurrentKeyInBuf(pBlockScanInfo, pReader); + + // it is a clean block, load it directly + if (isCleanFileDataBlock(pReader, pBlockInfo, pBlockScanInfo, keyInBuf, pLastBlockReader) && + (pRecord->numRow <= pReader->resBlockInfo.capacity)) { + if (asc || (!hasDataInLastBlock(pLastBlockReader))) { + code = copyBlockDataToSDataBlock(pReader); + if (code) { + goto _end; + } + + // record the last key value + pBlockScanInfo->lastKey = asc ? pRecord->lastKey : pRecord->firstKey; + goto _end; + } + } + } else { // file blocks not exist + ASSERT(0); + pBlockScanInfo = *pReader->status.pTableIter; + if (pReader->pIgnoreTables && + taosHashGet(*pReader->pIgnoreTables, &pBlockScanInfo->uid, sizeof(pBlockScanInfo->uid))) { + // setBlockAllDumped(pDumpInfo, pBlock->maxKey.ts, pReader->info.order); + return code; + } + } + + SBlockData* pBlockData = &pReader->status.fileBlockData; + + while (1) { + bool hasBlockData = false; + { + while (pBlockData->nRow > 0 && pBlockData->uid == pBlockScanInfo->uid) { + // find the first qualified row in data block + if (isValidFileBlockRow(pBlockData, pDumpInfo, pBlockScanInfo, pReader)) { + hasBlockData = true; + break; + } + + pDumpInfo->rowIndex += step; + + if (pDumpInfo->rowIndex >= pBlockData->nRow || pDumpInfo->rowIndex < 0) { + pBlockInfo = getCurrentBlockInfo(&pReader->status.blockIter); // NOTE: get the new block info + + // continue check for the next file block if the last ts in the current block + // is overlapped with the next neighbor block + bool loadNeighbor = false; + code = loadNeighborIfOverlap(pBlockInfo, pBlockScanInfo, pReader, &loadNeighbor); + if ((!loadNeighbor) || (code != 0)) { + setBlockAllDumped(pDumpInfo, pRecord->lastKey, pReader->info.order); + break; + } + } + } + } + + // no data in last block and block, no need to proceed. + if (hasBlockData == false) { + break; + } + + code = buildComposedDataBlockImpl(pReader, pBlockScanInfo, pBlockData, pLastBlockReader); + if (code) { + goto _end; + } + + // currently loaded file data block is consumed + if ((pBlockData->nRow > 0) && (pDumpInfo->rowIndex >= pBlockData->nRow || pDumpInfo->rowIndex < 0)) { + setBlockAllDumped(pDumpInfo, pRecord->lastKey, pReader->info.order); + break; + } + + if (pResBlock->info.rows >= pReader->resBlockInfo.capacity) { + break; + } + } + +_end: + el = (taosGetTimestampUs() - st) / 1000.0; + updateComposedBlockInfo(pReader, el, pBlockScanInfo); + + if (pResBlock->info.rows > 0) { + tsdbDebug("%p uid:%" PRIu64 ", composed data block created, brange:%" PRIu64 "-%" PRIu64 " rows:%" PRId64 + ", elapsed time:%.2f ms %s", + pReader, pResBlock->info.id.uid, pResBlock->info.window.skey, pResBlock->info.window.ekey, + pResBlock->info.rows, el, pReader->idStr); + } + + return code; +} + +void setComposedBlockFlag(STsdbReader* pReader, bool composed) { pReader->status.composedDataBlock = composed; } + +int32_t getInitialDelIndex(const SArray* pDelSkyline, int32_t order) { + if (pDelSkyline == NULL) { + return 0; + } + + return ASCENDING_TRAVERSE(order) ? 0 : taosArrayGetSize(pDelSkyline) - 1; +} + +int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, int32_t order, SCostSummary* pCost) { + int32_t code = 0; + int32_t newDelDataInFile = taosArrayGetSize(pBlockScanInfo->pfileDelData); + if (newDelDataInFile == 0 && + ((pBlockScanInfo->delSkyline != NULL) || (TARRAY_SIZE(pBlockScanInfo->pMemDelData) == 0))) { + return code; + } + + int64_t st = taosGetTimestampUs(); + + if (pBlockScanInfo->delSkyline != NULL) { + taosArrayClear(pBlockScanInfo->delSkyline); + } else { + pBlockScanInfo->delSkyline = taosArrayInit(4, sizeof(TSDBKEY)); + } + + SArray* pSource = pBlockScanInfo->pfileDelData; + if (pSource == NULL) { + pSource = pBlockScanInfo->pMemDelData; + } else { + taosArrayAddAll(pSource, pBlockScanInfo->pMemDelData); + } + + code = tsdbBuildDeleteSkyline(pSource, 0, taosArrayGetSize(pSource) - 1, pBlockScanInfo->delSkyline); + + taosArrayClear(pBlockScanInfo->pfileDelData); + int32_t index = getInitialDelIndex(pBlockScanInfo->delSkyline, order); + + pBlockScanInfo->iter.index = index; + pBlockScanInfo->iiter.index = index; + pBlockScanInfo->fileDelIndex = index; + pBlockScanInfo->lastBlockDelIndex = index; + + double el = taosGetTimestampUs() - st; + pCost->createSkylineIterTime = el / 1000.0; + + return code; +} + +TSDBKEY getCurrentKeyInBuf(STableBlockScanInfo* pScanInfo, STsdbReader* pReader) { + bool asc = ASCENDING_TRAVERSE(pReader->info.order); + TSDBKEY key = {.ts = TSKEY_INITIAL_VAL}, ikey = {.ts = TSKEY_INITIAL_VAL}; + + bool hasKey = false, hasIKey = false; + TSDBROW* pRow = getValidMemRow(&pScanInfo->iter, pScanInfo->delSkyline, pReader); + if (pRow != NULL) { + hasKey = true; + key = TSDBROW_KEY(pRow); + } + + TSDBROW* pIRow = getValidMemRow(&pScanInfo->iiter, pScanInfo->delSkyline, pReader); + if (pIRow != NULL) { + hasIKey = true; + ikey = TSDBROW_KEY(pIRow); + } + + if (hasKey) { + if (hasIKey) { // has data in mem & imem + if (asc) { + return key.ts <= ikey.ts ? key : ikey; + } else { + return key.ts <= ikey.ts ? ikey : key; + } + } else { // no data in imem + return key; + } + } else { + // no data in mem & imem, return the initial value + // only imem has data, return ikey + return ikey; + } +} + +static int32_t moveToNextFile(STsdbReader* pReader, SBlockNumber* pBlockNum, SArray* pTableList) { + SReaderStatus* pStatus = &pReader->status; + pBlockNum->numOfBlocks = 0; + pBlockNum->numOfLastFiles = 0; + + size_t numOfTables = tSimpleHashGetSize(pReader->status.pTableMap); + SArray* pIndexList = taosArrayInit(numOfTables, sizeof(SBrinBlk)); + + while (1) { + // only check here, since the iterate data in memory is very fast. + if (pReader->code != TSDB_CODE_SUCCESS) { + tsdbWarn("tsdb reader is stopped ASAP, code:%s, %s", strerror(pReader->code), pReader->idStr); + return pReader->code; + } + + bool hasNext = false; + int32_t code = filesetIteratorNext(&pStatus->fileIter, pReader, &hasNext); + if (code != TSDB_CODE_SUCCESS) { + taosArrayDestroy(pIndexList); + return code; + } + + if (!hasNext) { // no data files on disk + break; + } + + taosArrayClear(pIndexList); + code = doLoadBlockIndex(pReader, pReader->pFileReader, pIndexList); + if (code != TSDB_CODE_SUCCESS) { + taosArrayDestroy(pIndexList); + return code; + } + + if (taosArrayGetSize(pIndexList) > 0 || pReader->status.pCurrentFileset->lvlArr->size > 0) { + code = doLoadFileBlock(pReader, pIndexList, pBlockNum, pTableList); + if (code != TSDB_CODE_SUCCESS) { + taosArrayDestroy(pIndexList); + return code; + } + + if (pBlockNum->numOfBlocks + pBlockNum->numOfLastFiles > 0) { + break; + } + } + + // no blocks in current file, try next files + } + + taosArrayDestroy(pIndexList); + return loadDataFileTombDataForAll(pReader); +} + +static void resetTableListIndex(SReaderStatus* pStatus) { + STableUidList* pList = &pStatus->uidList; + + pList->currentIndex = 0; + uint64_t uid = pList->tableUidList[0]; + pStatus->pTableIter = tSimpleHashGet(pStatus->pTableMap, &uid, sizeof(uid)); +} + +static bool moveToNextTable(STableUidList* pOrderedCheckInfo, SReaderStatus* pStatus) { + pOrderedCheckInfo->currentIndex += 1; + if (pOrderedCheckInfo->currentIndex >= tSimpleHashGetSize(pStatus->pTableMap)) { + pStatus->pTableIter = NULL; + return false; + } + + uint64_t uid = pOrderedCheckInfo->tableUidList[pOrderedCheckInfo->currentIndex]; + pStatus->pTableIter = tSimpleHashGet(pStatus->pTableMap, &uid, sizeof(uid)); + return (pStatus->pTableIter != NULL); +} + +static int32_t doLoadLastBlockSequentially(STsdbReader* pReader) { + SReaderStatus* pStatus = &pReader->status; + SLastBlockReader* pLastBlockReader = pStatus->fileIter.pLastBlockReader; + STableUidList* pUidList = &pStatus->uidList; + int32_t code = TSDB_CODE_SUCCESS; + + if (tSimpleHashGetSize(pStatus->pTableMap) == 0) { + return TSDB_CODE_SUCCESS; + } + + SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; + + while (1) { + if (pReader->code != TSDB_CODE_SUCCESS) { + tsdbWarn("tsdb reader is stopped ASAP, code:%s, %s", strerror(pReader->code), pReader->idStr); + return pReader->code; + } + + // load the last data block of current table + STableBlockScanInfo* pScanInfo = *(STableBlockScanInfo**)pStatus->pTableIter; + if (pReader->pIgnoreTables && taosHashGet(*pReader->pIgnoreTables, &pScanInfo->uid, sizeof(pScanInfo->uid))) { + // reset the index in last block when handing a new file + // doCleanupTableScanInfo(pScanInfo); + bool hasNexTable = moveToNextTable(pUidList, pStatus); + if (!hasNexTable) { + return TSDB_CODE_SUCCESS; + } + + continue; + } + + // reset the index in last block when handing a new file + // doCleanupTableScanInfo(pScanInfo); + + bool hasDataInLastFile = initLastBlockReader(pLastBlockReader, pScanInfo, pReader); + if (!hasDataInLastFile) { + bool hasNexTable = moveToNextTable(pUidList, pStatus); + if (!hasNexTable) { + return TSDB_CODE_SUCCESS; + } + + continue; + } + + int64_t st = taosGetTimestampUs(); + while (1) { + bool hasBlockLData = hasDataInLastBlock(pLastBlockReader); + + // no data in last block and block, no need to proceed. + if (hasBlockLData == false) { + break; + } + + code = buildComposedDataBlockImpl(pReader, pScanInfo, &pReader->status.fileBlockData, pLastBlockReader); + if (code) { + return code; + } + + if (pResBlock->info.rows >= pReader->resBlockInfo.capacity) { + break; + } + } + + double el = (taosGetTimestampUs() - st) / 1000.0; + updateComposedBlockInfo(pReader, el, pScanInfo); + + if (pResBlock->info.rows > 0) { + tsdbDebug("%p uid:%" PRIu64 ", composed data block created, brange:%" PRIu64 "-%" PRIu64 " rows:%" PRId64 + ", elapsed time:%.2f ms %s", + pReader, pResBlock->info.id.uid, pResBlock->info.window.skey, pResBlock->info.window.ekey, + pResBlock->info.rows, el, pReader->idStr); + return TSDB_CODE_SUCCESS; + } + + // current table is exhausted, let's try next table + bool hasNexTable = moveToNextTable(pUidList, pStatus); + if (!hasNexTable) { + return TSDB_CODE_SUCCESS; + } + } +} + +static int32_t doBuildDataBlock(STsdbReader* pReader) { + int32_t code = TSDB_CODE_SUCCESS; + + SReaderStatus* pStatus = &pReader->status; + SDataBlockIter* pBlockIter = &pStatus->blockIter; + STableBlockScanInfo* pScanInfo = NULL; + SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(pBlockIter); + SLastBlockReader* pLastBlockReader = pReader->status.fileIter.pLastBlockReader; + + if (pReader->pIgnoreTables && taosHashGet(*pReader->pIgnoreTables, &pBlockInfo->uid, sizeof(pBlockInfo->uid))) { + setBlockAllDumped(&pStatus->fBlockDumpInfo, pBlockInfo->record.lastKey, pReader->info.order); + return code; + } + + if (pReader->code != TSDB_CODE_SUCCESS) { + return pReader->code; + } + + pScanInfo = getTableBlockScanInfo(pReader->status.pTableMap, pBlockInfo->uid, pReader->idStr); + if (pScanInfo == NULL) { + return terrno; + } + + initLastBlockReader(pLastBlockReader, pScanInfo, pReader); + TSDBKEY keyInBuf = getCurrentKeyInBuf(pScanInfo, pReader); + + if (fileBlockShouldLoad(pReader, pBlockInfo, pScanInfo, keyInBuf, pLastBlockReader)) { + code = doLoadFileBlockData(pReader, pBlockIter, &pStatus->fileBlockData, pScanInfo->uid); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + // build composed data block + code = buildComposedDataBlock(pReader); + } else if (bufferDataInFileBlockGap(pReader->info.order, keyInBuf, pBlockInfo)) { + // data in memory that are earlier than current file block + // rows in buffer should be less than the file block in asc, greater than file block in desc + int64_t endKey = + (ASCENDING_TRAVERSE(pReader->info.order)) ? pBlockInfo->record.firstKey : pBlockInfo->record.lastKey; + code = buildDataBlockFromBuf(pReader, pScanInfo, endKey); + } else { + if (hasDataInLastBlock(pLastBlockReader) && !ASCENDING_TRAVERSE(pReader->info.order)) { + // only return the rows in last block + int64_t tsLast = getCurrentKeyInLastBlock(pLastBlockReader); + ASSERT(tsLast >= pBlockInfo->record.lastKey); + + SBlockData* pBData = &pReader->status.fileBlockData; + tBlockDataReset(pBData); + + SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; + tsdbDebug("load data in last block firstly, due to desc scan data, %s", pReader->idStr); + + int64_t st = taosGetTimestampUs(); + + while (1) { + bool hasBlockLData = hasDataInLastBlock(pLastBlockReader); + + // no data in last block and block, no need to proceed. + if (hasBlockLData == false) { + break; + } + + code = buildComposedDataBlockImpl(pReader, pScanInfo, &pReader->status.fileBlockData, pLastBlockReader); + if (code) { + return code; + } + + if (pResBlock->info.rows >= pReader->resBlockInfo.capacity) { + break; + } + } + + double el = (taosGetTimestampUs() - st) / 1000.0; + updateComposedBlockInfo(pReader, el, pScanInfo); + + if (pResBlock->info.rows > 0) { + tsdbDebug("%p uid:%" PRIu64 ", composed data block created, brange:%" PRIu64 "-%" PRIu64 " rows:%" PRId64 + ", elapsed time:%.2f ms %s", + pReader, pResBlock->info.id.uid, pResBlock->info.window.skey, pResBlock->info.window.ekey, + pResBlock->info.rows, el, pReader->idStr); + } + } else { // whole block is required, return it directly + SDataBlockInfo* pInfo = &pReader->resBlockInfo.pResBlock->info; + pInfo->rows = pBlockInfo->record.numRow; + pInfo->id.uid = pScanInfo->uid; + pInfo->dataLoad = 0; + pInfo->window = (STimeWindow){.skey = pBlockInfo->record.firstKey, .ekey = pBlockInfo->record.lastKey}; + setComposedBlockFlag(pReader, false); + setBlockAllDumped(&pStatus->fBlockDumpInfo, pBlockInfo->record.lastKey, pReader->info.order); + + // update the last key for the corresponding table + pScanInfo->lastKey = ASCENDING_TRAVERSE(pReader->info.order) ? pInfo->window.ekey : pInfo->window.skey; + tsdbDebug("%p uid:%" PRIu64 + " clean file block retrieved from file, global index:%d, " + "table index:%d, rows:%d, brange:%" PRId64 "-%" PRId64 ", %s", + pReader, pScanInfo->uid, pBlockIter->index, pBlockInfo->tbBlockIdx, pBlockInfo->record.numRow, + pBlockInfo->record.firstKey, pBlockInfo->record.lastKey, pReader->idStr); + } + } + + return (pReader->code != TSDB_CODE_SUCCESS) ? pReader->code : code; +} + +static int32_t doSumFileBlockRows(STsdbReader* pReader, SDataFReader* pFileReader) { + int64_t st = taosGetTimestampUs(); + LRUHandle* handle = NULL; + int32_t code = tsdbCacheGetBlockIdx(pFileReader->pTsdb->biCache, pFileReader, &handle); + if (code != TSDB_CODE_SUCCESS || handle == NULL) { + goto _end; + } + + int32_t numOfTables = tSimpleHashGetSize(pReader->status.pTableMap); + + SArray* aBlockIdx = (SArray*)taosLRUCacheValue(pFileReader->pTsdb->biCache, handle); + size_t num = taosArrayGetSize(aBlockIdx); + if (num == 0) { + tsdbBICacheRelease(pFileReader->pTsdb->biCache, handle); + return TSDB_CODE_SUCCESS; + } + + SBlockIdx* pBlockIdx = NULL; + for (int32_t i = 0; i < num; ++i) { + pBlockIdx = (SBlockIdx*)taosArrayGet(aBlockIdx, i); + if (pBlockIdx->suid != pReader->info.suid) { + continue; + } + + STableBlockScanInfo** p = tSimpleHashGet(pReader->status.pTableMap, &pBlockIdx->uid, sizeof(pBlockIdx->uid)); + if (p == NULL) { + continue; + } + + STableBlockScanInfo* pScanInfo = *p; + SDataBlk block = {0}; + // for (int32_t j = 0; j < pScanInfo->mapData.nItem; ++j) { + // tGetDataBlk(pScanInfo->mapData.pData + pScanInfo->mapData.aOffset[j], &block); + // pReader->rowsNum += block.nRow; + // } + } + +_end: + tsdbBICacheRelease(pFileReader->pTsdb->biCache, handle); + return code; +} + +static int32_t doSumSttBlockRows(STsdbReader* pReader) { + int32_t code = TSDB_CODE_SUCCESS; + SLastBlockReader* pLastBlockReader = pReader->status.fileIter.pLastBlockReader; + SSttBlockLoadInfo* pBlockLoadInfo = NULL; +#if 0 + for (int32_t i = 0; i < pReader->pFileReader->pSet->nSttF; ++i) { // open all last file + pBlockLoadInfo = &pLastBlockReader->pInfo[i]; + + code = tsdbReadSttBlk(pReader->pFileReader, i, pBlockLoadInfo->aSttBlk); + if (code) { + return code; + } + + size_t size = taosArrayGetSize(pBlockLoadInfo->aSttBlk); + if (size >= 1) { + SSttBlk* pStart = taosArrayGet(pBlockLoadInfo->aSttBlk, 0); + SSttBlk* pEnd = taosArrayGet(pBlockLoadInfo->aSttBlk, size - 1); + + // all identical + if (pStart->suid == pEnd->suid) { + if (pStart->suid != pReader->info.suid) { + // no qualified stt block existed + taosArrayClear(pBlockLoadInfo->aSttBlk); + continue; + } + for (int32_t j = 0; j < size; ++j) { + SSttBlk* p = taosArrayGet(pBlockLoadInfo->aSttBlk, j); + pReader->rowsNum += p->nRow; + } + } else { + for (int32_t j = 0; j < size; ++j) { + SSttBlk* p = taosArrayGet(pBlockLoadInfo->aSttBlk, j); + uint64_t s = p->suid; + if (s < pReader->info.suid) { + continue; + } + + if (s == pReader->info.suid) { + pReader->rowsNum += p->nRow; + } else if (s > pReader->info.suid) { + break; + } + } + } + } + } +#endif + + return code; +} + +static int32_t readRowsCountFromFiles(STsdbReader* pReader) { + int32_t code = TSDB_CODE_SUCCESS; + + while (1) { + bool hasNext = false; + code = filesetIteratorNext(&pReader->status.fileIter, pReader, &hasNext); + if (code) { + return code; + } + + if (!hasNext) { // no data files on disk + break; + } + + // code = doSumFileBlockRows(pReader, pReader->pFileReader); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + code = doSumSttBlockRows(pReader); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + + pReader->status.loadFromFile = false; + + return code; +} + +static int32_t readRowsCountFromMem(STsdbReader* pReader) { + int32_t code = TSDB_CODE_SUCCESS; + int64_t memNum = 0, imemNum = 0; + if (pReader->pReadSnap->pMem != NULL) { + tsdbMemTableCountRows(pReader->pReadSnap->pMem, pReader->status.pTableMap, &memNum); + } + + if (pReader->pReadSnap->pIMem != NULL) { + tsdbMemTableCountRows(pReader->pReadSnap->pIMem, pReader->status.pTableMap, &imemNum); + } + + pReader->rowsNum += memNum + imemNum; + + return code; +} + +static int32_t buildBlockFromBufferSequentially(STsdbReader* pReader) { + SReaderStatus* pStatus = &pReader->status; + STableUidList* pUidList = &pStatus->uidList; + + while (1) { + if (pReader->code != TSDB_CODE_SUCCESS) { + tsdbWarn("tsdb reader is stopped ASAP, code:%s, %s", strerror(pReader->code), pReader->idStr); + return pReader->code; + } + + STableBlockScanInfo** pBlockScanInfo = pStatus->pTableIter; + if (pReader->pIgnoreTables && + taosHashGet(*pReader->pIgnoreTables, &(*pBlockScanInfo)->uid, sizeof((*pBlockScanInfo)->uid))) { + bool hasNexTable = moveToNextTable(pUidList, pStatus); + if (!hasNexTable) { + return TSDB_CODE_SUCCESS; + } + pBlockScanInfo = pStatus->pTableIter; + } + + initMemDataIterator(*pBlockScanInfo, pReader); + initDelSkylineIterator(*pBlockScanInfo, pReader->info.order, &pReader->cost); + + int64_t endKey = (ASCENDING_TRAVERSE(pReader->info.order)) ? INT64_MAX : INT64_MIN; + int32_t code = buildDataBlockFromBuf(pReader, *pBlockScanInfo, endKey); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + if (pReader->resBlockInfo.pResBlock->info.rows > 0) { + return TSDB_CODE_SUCCESS; + } + + // current table is exhausted, let's try next table + bool hasNexTable = moveToNextTable(pUidList, pStatus); + if (!hasNexTable) { + return TSDB_CODE_SUCCESS; + } + } +} + +// set the correct start position in case of the first/last file block, according to the query time window +static void initBlockDumpInfo(STsdbReader* pReader, SDataBlockIter* pBlockIter) { + int64_t lastKey = ASCENDING_TRAVERSE(pReader->info.order) ? INT64_MIN : INT64_MAX; + SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(pBlockIter); + SReaderStatus* pStatus = &pReader->status; + SFileBlockDumpInfo* pDumpInfo = &pStatus->fBlockDumpInfo; + + if (pBlockInfo) { + STableBlockScanInfo* pScanInfo = tSimpleHashGet(pBlockIter->pTableMap, &pBlockInfo->uid, sizeof(pBlockInfo->uid)); + if (pScanInfo) { + lastKey = pScanInfo->lastKey; + } + + pDumpInfo->totalRows = pBlockInfo->record.numRow; + pDumpInfo->rowIndex = ASCENDING_TRAVERSE(pReader->info.order) ? 0 : pBlockInfo->record.numRow - 1; + } else { + pDumpInfo->totalRows = 0; + pDumpInfo->rowIndex = 0; + } + + pDumpInfo->allDumped = false; + pDumpInfo->lastKey = lastKey; +} + +static int32_t initForFirstBlockInFile(STsdbReader* pReader, SDataBlockIter* pBlockIter) { + SBlockNumber num = {0}; + SArray* pTableList = taosArrayInit(40, POINTER_BYTES); + + int32_t code = moveToNextFile(pReader, &num, pTableList); + if (code != TSDB_CODE_SUCCESS) { + taosArrayDestroy(pTableList); + return code; + } + + // all data files are consumed, try data in buffer + if (num.numOfBlocks + num.numOfLastFiles == 0) { + pReader->status.loadFromFile = false; + taosArrayDestroy(pTableList); + return code; + } + + // initialize the block iterator for a new fileset + if (num.numOfBlocks > 0) { + code = initBlockIterator(pReader, pBlockIter, num.numOfBlocks, pTableList); + } else { // no block data, only last block exists + tBlockDataReset(&pReader->status.fileBlockData); + resetDataBlockIterator(pBlockIter, pReader->info.order); + resetTableListIndex(&pReader->status); + } + + // set the correct start position according to the query time window + initBlockDumpInfo(pReader, pBlockIter); + taosArrayDestroy(pTableList); + return code; +} + +static bool fileBlockPartiallyRead(SFileBlockDumpInfo* pDumpInfo, bool asc) { + return (!pDumpInfo->allDumped) && + ((pDumpInfo->rowIndex > 0 && asc) || (pDumpInfo->rowIndex < (pDumpInfo->totalRows - 1) && (!asc))); +} + +typedef enum { + TSDB_READ_RETURN = 0x1, + TSDB_READ_CONTINUE = 0x2, +} ERetrieveType; + +static ERetrieveType doReadDataFromLastFiles(STsdbReader* pReader) { + int32_t code = TSDB_CODE_SUCCESS; + SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; + SDataBlockIter* pBlockIter = &pReader->status.blockIter; + + while (1) { + terrno = 0; + + code = doLoadLastBlockSequentially(pReader); + if (code != TSDB_CODE_SUCCESS) { + terrno = code; + return TSDB_READ_RETURN; + } + + if (pResBlock->info.rows > 0) { + return TSDB_READ_RETURN; + } + + // all data blocks are checked in this last block file, now let's try the next file + ASSERT(pReader->status.pTableIter == NULL); + code = initForFirstBlockInFile(pReader, pBlockIter); + + // error happens or all the data files are completely checked + if ((code != TSDB_CODE_SUCCESS) || (pReader->status.loadFromFile == false)) { + terrno = code; + return TSDB_READ_RETURN; + } + + if (pBlockIter->numOfBlocks > 0) { // there are data blocks existed. + return TSDB_READ_CONTINUE; + } else { // all blocks in data file are checked, let's check the data in last files + resetTableListIndex(&pReader->status); + } + } +} + +static int32_t buildBlockFromFiles(STsdbReader* pReader) { + int32_t code = TSDB_CODE_SUCCESS; + bool asc = ASCENDING_TRAVERSE(pReader->info.order); + + SDataBlockIter* pBlockIter = &pReader->status.blockIter; + SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; + + if (pBlockIter->numOfBlocks == 0) { + // let's try to extract data from stt files. + ERetrieveType type = doReadDataFromLastFiles(pReader); + if (type == TSDB_READ_RETURN) { + return terrno; + } + + code = doBuildDataBlock(pReader); + if (code != TSDB_CODE_SUCCESS || pResBlock->info.rows > 0) { + return code; + } + } + + while (1) { + SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; + + if (fileBlockPartiallyRead(pDumpInfo, asc)) { // file data block is partially loaded + code = buildComposedDataBlock(pReader); + } else { + // current block are exhausted, try the next file block + if (pDumpInfo->allDumped) { + // try next data block in current file + bool hasNext = blockIteratorNext(&pReader->status.blockIter, pReader->idStr); + if (hasNext) { // check for the next block in the block accessed order list + initBlockDumpInfo(pReader, pBlockIter); + } else { + // all data blocks in files are checked, let's check the data in last files. + // data blocks in current file are exhausted, let's try the next file now + SBlockData* pBlockData = &pReader->status.fileBlockData; + if (pBlockData->uid != 0) { + tBlockDataClear(pBlockData); + } + + tBlockDataReset(pBlockData); + resetDataBlockIterator(pBlockIter, pReader->info.order); + resetTableListIndex(&pReader->status); + + ERetrieveType type = doReadDataFromLastFiles(pReader); + if (type == TSDB_READ_RETURN) { + return terrno; + } + } + } + + code = doBuildDataBlock(pReader); + } + + if (code != TSDB_CODE_SUCCESS || pResBlock->info.rows > 0) { + return code; + } + } +} + +static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* retentions, const char* idStr, + int8_t* pLevel) { + if (VND_IS_RSMA(pVnode)) { + int8_t level = 0; + int8_t precision = pVnode->config.tsdbCfg.precision; + int64_t now = taosGetTimestamp(precision); + int64_t offset = tsQueryRsmaTolerance * ((precision == TSDB_TIME_PRECISION_MILLI) ? 1L + : (precision == TSDB_TIME_PRECISION_MICRO) ? 1000L + : 1000000L); + + for (int8_t i = 0; i < TSDB_RETENTION_MAX; ++i) { + SRetention* pRetention = retentions + level; + if (pRetention->keep <= 0) { + if (level > 0) { + --level; + } + break; + } + if ((now - pRetention->keep) <= (winSKey + offset)) { + break; + } + ++level; + } + + const char* str = (idStr != NULL) ? idStr : ""; + + if (level == TSDB_RETENTION_L0) { + *pLevel = TSDB_RETENTION_L0; + tsdbDebug("vgId:%d, rsma level %d is selected to query %s", TD_VID(pVnode), TSDB_RETENTION_L0, str); + return VND_RSMA0(pVnode); + } else if (level == TSDB_RETENTION_L1) { + *pLevel = TSDB_RETENTION_L1; + tsdbDebug("vgId:%d, rsma level %d is selected to query %s", TD_VID(pVnode), TSDB_RETENTION_L1, str); + return VND_RSMA1(pVnode); + } else { + *pLevel = TSDB_RETENTION_L2; + tsdbDebug("vgId:%d, rsma level %d is selected to query %s", TD_VID(pVnode), TSDB_RETENTION_L2, str); + return VND_RSMA2(pVnode); + } + } + + return VND_TSDB(pVnode); +} + +SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, int8_t level) { + int64_t startVer = (pCond->startVersion == -1) ? 0 : pCond->startVersion; + + int64_t endVer = 0; + if (pCond->endVersion == -1) { + // user not specified end version, set current maximum version of vnode as the endVersion + endVer = pVnode->state.applied; + } else { + endVer = (pCond->endVersion > pVnode->state.applied) ? pVnode->state.applied : pCond->endVersion; + } + + return (SVersionRange){.minVer = startVer, .maxVer = endVer}; +} + +bool hasBeenDropped(const SArray* pDelList, int32_t* index, int64_t key, int64_t ver, int32_t order, + SVersionRange* pVerRange) { + if (pDelList == NULL || (taosArrayGetSize(pDelList) == 0)) { + return false; + } + + size_t num = taosArrayGetSize(pDelList); + bool asc = ASCENDING_TRAVERSE(order); + int32_t step = asc ? 1 : -1; + + if (asc) { + if (*index >= num - 1) { + TSDBKEY* last = taosArrayGetLast(pDelList); + ASSERT(key >= last->ts); + + if (key > last->ts) { + return false; + } else if (key == last->ts) { + TSDBKEY* prev = taosArrayGet(pDelList, num - 2); + return (prev->version >= ver && prev->version <= pVerRange->maxVer && prev->version >= pVerRange->minVer); + } + } else { + TSDBKEY* pCurrent = taosArrayGet(pDelList, *index); + TSDBKEY* pNext = taosArrayGet(pDelList, (*index) + 1); + + if (key < pCurrent->ts) { + return false; + } + + if (pCurrent->ts <= key && pNext->ts >= key && pCurrent->version >= ver && + pVerRange->maxVer >= pCurrent->version) { + return true; + } + + while (pNext->ts <= key && (*index) < num - 1) { + (*index) += 1; + + if ((*index) < num - 1) { + pCurrent = taosArrayGet(pDelList, *index); + pNext = taosArrayGet(pDelList, (*index) + 1); + + // it is not a consecutive deletion range, ignore it + if (pCurrent->version == 0 && pNext->version > 0) { + continue; + } + + if (pCurrent->ts <= key && pNext->ts >= key && pCurrent->version >= ver && + pVerRange->maxVer >= pCurrent->version) { + return true; + } + } + } + + return false; + } + } else { + if (*index <= 0) { + TSDBKEY* pFirst = taosArrayGet(pDelList, 0); + + if (key < pFirst->ts) { + return false; + } else if (key == pFirst->ts) { + return pFirst->version >= ver; + } else { + ASSERT(0); + } + } else { + TSDBKEY* pCurrent = taosArrayGet(pDelList, *index); + TSDBKEY* pPrev = taosArrayGet(pDelList, (*index) - 1); + + if (key > pCurrent->ts) { + return false; + } + + if (pPrev->ts <= key && pCurrent->ts >= key && pPrev->version >= ver) { + return true; + } + + while (pPrev->ts >= key && (*index) > 1) { + (*index) += step; + + if ((*index) >= 1) { + pCurrent = taosArrayGet(pDelList, *index); + pPrev = taosArrayGet(pDelList, (*index) - 1); + + // it is not a consecutive deletion range, ignore it + if (pCurrent->version > 0 && pPrev->version == 0) { + continue; + } + + if (pPrev->ts <= key && pCurrent->ts >= key && pPrev->version >= ver) { + return true; + } + } + } + + return false; + } + } + + return false; +} + +TSDBROW* getValidMemRow(SIterInfo* pIter, const SArray* pDelList, STsdbReader* pReader) { + if (!pIter->hasVal) { + return NULL; + } + + TSDBROW* pRow = tsdbTbDataIterGet(pIter->iter); + TSDBKEY key = TSDBROW_KEY(pRow); + + if (outOfTimeWindow(key.ts, &pReader->info.window)) { + pIter->hasVal = false; + return NULL; + } + + // it is a valid data version + if ((key.version <= pReader->info.verRange.maxVer && key.version >= pReader->info.verRange.minVer) && + (!hasBeenDropped(pDelList, &pIter->index, key.ts, key.version, pReader->info.order, &pReader->info.verRange))) { + return pRow; + } + + while (1) { + pIter->hasVal = tsdbTbDataIterNext(pIter->iter); + if (!pIter->hasVal) { + return NULL; + } + + pRow = tsdbTbDataIterGet(pIter->iter); + + key = TSDBROW_KEY(pRow); + if (outOfTimeWindow(key.ts, &pReader->info.window)) { + pIter->hasVal = false; + return NULL; + } + + if (key.version <= pReader->info.verRange.maxVer && key.version >= pReader->info.verRange.minVer && + (!hasBeenDropped(pDelList, &pIter->index, key.ts, key.version, pReader->info.order, &pReader->info.verRange))) { + return pRow; + } + } +} + +int32_t doMergeRowsInBuf(SIterInfo* pIter, uint64_t uid, int64_t ts, SArray* pDelList, STsdbReader* pReader) { + SRowMerger* pMerger = &pReader->status.merger; + + while (1) { + pIter->hasVal = tsdbTbDataIterNext(pIter->iter); + if (!pIter->hasVal) { + break; + } + + // data exists but not valid + TSDBROW* pRow = getValidMemRow(pIter, pDelList, pReader); + if (pRow == NULL) { + break; + } + + // ts is not identical, quit + TSDBKEY k = TSDBROW_KEY(pRow); + if (k.ts != ts) { + break; + } + + if (pRow->type == TSDBROW_ROW_FMT) { + STSchema* pTSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(pRow), pReader, uid); + if (pTSchema == NULL) { + return terrno; + } + + tsdbRowMergerAdd(pMerger, pRow, pTSchema); + } else { // column format + tsdbRowMergerAdd(pMerger, pRow, NULL); + } + } + + return TSDB_CODE_SUCCESS; +} + +static int32_t doMergeRowsInFileBlockImpl(SBlockData* pBlockData, int32_t rowIndex, int64_t key, SRowMerger* pMerger, + SVersionRange* pVerRange, int32_t step) { + while (rowIndex < pBlockData->nRow && rowIndex >= 0 && pBlockData->aTSKEY[rowIndex] == key) { + if (pBlockData->aVersion[rowIndex] > pVerRange->maxVer || pBlockData->aVersion[rowIndex] < pVerRange->minVer) { + rowIndex += step; + continue; + } + + TSDBROW fRow = tsdbRowFromBlockData(pBlockData, rowIndex); + tsdbRowMergerAdd(pMerger, &fRow, NULL); + rowIndex += step; + } + + return rowIndex; +} + +typedef enum { + CHECK_FILEBLOCK_CONT = 0x1, + CHECK_FILEBLOCK_QUIT = 0x2, +} CHECK_FILEBLOCK_STATE; + +static int32_t checkForNeighborFileBlock(STsdbReader* pReader, STableBlockScanInfo* pScanInfo, + SFileDataBlockInfo* pFBlock, SRowMerger* pMerger, int64_t key, + CHECK_FILEBLOCK_STATE* state) { + SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; + SBlockData* pBlockData = &pReader->status.fileBlockData; + bool asc = ASCENDING_TRAVERSE(pReader->info.order); + + *state = CHECK_FILEBLOCK_QUIT; + int32_t step = ASCENDING_TRAVERSE(pReader->info.order) ? 1 : -1; + + bool loadNeighbor = true; + int32_t code = loadNeighborIfOverlap(pFBlock, pScanInfo, pReader, &loadNeighbor); + + if (loadNeighbor && (code == TSDB_CODE_SUCCESS)) { + pDumpInfo->rowIndex = + doMergeRowsInFileBlockImpl(pBlockData, pDumpInfo->rowIndex, key, pMerger, &pReader->info.verRange, step); + if ((pDumpInfo->rowIndex >= pDumpInfo->totalRows && asc) || (pDumpInfo->rowIndex < 0 && !asc)) { + *state = CHECK_FILEBLOCK_CONT; + } + } + + return code; +} + +int32_t doMergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pScanInfo, STsdbReader* pReader) { + SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; + + SRowMerger* pMerger = &pReader->status.merger; + bool asc = ASCENDING_TRAVERSE(pReader->info.order); + int64_t key = pBlockData->aTSKEY[pDumpInfo->rowIndex]; + int32_t step = asc ? 1 : -1; + + pDumpInfo->rowIndex += step; + if ((pDumpInfo->rowIndex <= pBlockData->nRow - 1 && asc) || (pDumpInfo->rowIndex >= 0 && !asc)) { + pDumpInfo->rowIndex = + doMergeRowsInFileBlockImpl(pBlockData, pDumpInfo->rowIndex, key, pMerger, &pReader->info.verRange, step); + } + + // all rows are consumed, let's try next file block + if ((pDumpInfo->rowIndex >= pBlockData->nRow && asc) || (pDumpInfo->rowIndex < 0 && !asc)) { + while (1) { + CHECK_FILEBLOCK_STATE st; + + SFileDataBlockInfo* pFileBlockInfo = getCurrentBlockInfo(&pReader->status.blockIter); + if (pFileBlockInfo == NULL) { + st = CHECK_FILEBLOCK_QUIT; + break; + } + + checkForNeighborFileBlock(pReader, pScanInfo, pFileBlockInfo, pMerger, key, &st); + if (st == CHECK_FILEBLOCK_QUIT) { + break; + } + } + } + + return TSDB_CODE_SUCCESS; +} + +int32_t doMergeRowsInLastBlock(SLastBlockReader* pLastBlockReader, STableBlockScanInfo* pScanInfo, int64_t ts, + SRowMerger* pMerger, SVersionRange* pVerRange, const char* idStr) { + while (nextRowFromLastBlocks(pLastBlockReader, pScanInfo, pVerRange)) { + int64_t next1 = getCurrentKeyInLastBlock(pLastBlockReader); + if (next1 == ts) { + TSDBROW* pRow1 = tMergeTreeGetRow(&pLastBlockReader->mergeTree); + tsdbRowMergerAdd(pMerger, pRow1, NULL); + } else { + tsdbTrace("uid:%" PRIu64 " last del index:%d, del range:%d, lastKeyInStt:%" PRId64 ", %s", pScanInfo->uid, + pScanInfo->lastBlockDelIndex, (int32_t)taosArrayGetSize(pScanInfo->delSkyline), pScanInfo->lastKeyInStt, + idStr); + break; + } + } + + return TSDB_CODE_SUCCESS; +} + +int32_t doMergeMemTableMultiRows(TSDBROW* pRow, uint64_t uid, SIterInfo* pIter, SArray* pDelList, TSDBROW* pResRow, + STsdbReader* pReader, bool* freeTSRow) { + TSDBROW* pNextRow = NULL; + TSDBROW current = *pRow; + + { // if the timestamp of the next valid row has a different ts, return current row directly + pIter->hasVal = tsdbTbDataIterNext(pIter->iter); + + if (!pIter->hasVal) { + *pResRow = *pRow; + *freeTSRow = false; + return TSDB_CODE_SUCCESS; + } else { // has next point in mem/imem + pNextRow = getValidMemRow(pIter, pDelList, pReader); + if (pNextRow == NULL) { + *pResRow = current; + *freeTSRow = false; + return TSDB_CODE_SUCCESS; + } + + if (TSDBROW_TS(¤t) != TSDBROW_TS(pNextRow)) { + *pResRow = current; + *freeTSRow = false; + return TSDB_CODE_SUCCESS; + } + } + } + + terrno = 0; + int32_t code = 0; + + // start to merge duplicated rows + if (current.type == TSDBROW_ROW_FMT) { + // get the correct schema for data in memory + STSchema* pTSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(¤t), pReader, uid); + if (pTSchema == NULL) { + return terrno; + } + + code = tsdbRowMergerAdd(&pReader->status.merger, ¤t, pTSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + STSchema* pTSchema1 = doGetSchemaForTSRow(TSDBROW_SVERSION(pNextRow), pReader, uid); + if (pTSchema1 == NULL) { + return terrno; + } + + tsdbRowMergerAdd(&pReader->status.merger, pNextRow, pTSchema1); + } else { // let's merge rows in file block + code = tsdbRowMergerAdd(&pReader->status.merger, ¤t, pReader->info.pSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + tsdbRowMergerAdd(&pReader->status.merger, pNextRow, NULL); + } + + code = doMergeRowsInBuf(pIter, uid, TSDBROW_TS(¤t), pDelList, pReader); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + code = tsdbRowMergerGetRow(&pReader->status.merger, &pResRow->pTSRow); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + pResRow->type = TSDBROW_ROW_FMT; + tsdbRowMergerClear(&pReader->status.merger); + *freeTSRow = true; + + return TSDB_CODE_SUCCESS; +} + +int32_t doMergeMemIMemRows(TSDBROW* pRow, TSDBROW* piRow, STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader, + SRow** pTSRow) { + SRowMerger* pMerger = &pReader->status.merger; + + TSDBKEY k = TSDBROW_KEY(pRow); + TSDBKEY ik = TSDBROW_KEY(piRow); + STSchema* pSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(pRow), pReader, pBlockScanInfo->uid); + if (pSchema == NULL) { + return terrno; + } + + STSchema* piSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(piRow), pReader, pBlockScanInfo->uid); + if (piSchema == NULL) { + return terrno; + } + + if (ASCENDING_TRAVERSE(pReader->info.order)) { // ascending order imem --> mem + int32_t code = tsdbRowMergerAdd(&pReader->status.merger, piRow, piSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + code = doMergeRowsInBuf(&pBlockScanInfo->iiter, pBlockScanInfo->uid, ik.ts, pBlockScanInfo->delSkyline, pReader); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + tsdbRowMergerAdd(&pReader->status.merger, pRow, pSchema); + code = doMergeRowsInBuf(&pBlockScanInfo->iter, pBlockScanInfo->uid, k.ts, pBlockScanInfo->delSkyline, pReader); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + } else { + int32_t code = tsdbRowMergerAdd(&pReader->status.merger, pRow, pSchema); + if (code != TSDB_CODE_SUCCESS || pMerger->pTSchema == NULL) { + return code; + } + + code = doMergeRowsInBuf(&pBlockScanInfo->iter, pBlockScanInfo->uid, k.ts, pBlockScanInfo->delSkyline, pReader); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + tsdbRowMergerAdd(&pReader->status.merger, piRow, piSchema); + code = doMergeRowsInBuf(&pBlockScanInfo->iiter, pBlockScanInfo->uid, ik.ts, pBlockScanInfo->delSkyline, pReader); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + + int32_t code = tsdbRowMergerGetRow(pMerger, pTSRow); + tsdbRowMergerClear(pMerger); + return code; +} + +static int32_t tsdbGetNextRowInMem(STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader, TSDBROW* pResRow, + int64_t endKey, bool* freeTSRow) { + TSDBROW* pRow = getValidMemRow(&pBlockScanInfo->iter, pBlockScanInfo->delSkyline, pReader); + TSDBROW* piRow = getValidMemRow(&pBlockScanInfo->iiter, pBlockScanInfo->delSkyline, pReader); + SArray* pDelList = pBlockScanInfo->delSkyline; + uint64_t uid = pBlockScanInfo->uid; + + // todo refactor + bool asc = ASCENDING_TRAVERSE(pReader->info.order); + if (pBlockScanInfo->iter.hasVal) { + TSDBKEY k = TSDBROW_KEY(pRow); + if ((k.ts >= endKey && asc) || (k.ts <= endKey && !asc)) { + pRow = NULL; + } + } + + if (pBlockScanInfo->iiter.hasVal) { + TSDBKEY k = TSDBROW_KEY(piRow); + if ((k.ts >= endKey && asc) || (k.ts <= endKey && !asc)) { + piRow = NULL; + } + } + + if (pBlockScanInfo->iter.hasVal && pBlockScanInfo->iiter.hasVal && pRow != NULL && piRow != NULL) { + TSDBKEY k = TSDBROW_KEY(pRow); + TSDBKEY ik = TSDBROW_KEY(piRow); + + int32_t code = TSDB_CODE_SUCCESS; + if (ik.ts != k.ts) { + if (((ik.ts < k.ts) && asc) || ((ik.ts > k.ts) && (!asc))) { // ik.ts < k.ts + code = doMergeMemTableMultiRows(piRow, uid, &pBlockScanInfo->iiter, pDelList, pResRow, pReader, freeTSRow); + } else if (((k.ts < ik.ts) && asc) || ((k.ts > ik.ts) && (!asc))) { + code = doMergeMemTableMultiRows(pRow, uid, &pBlockScanInfo->iter, pDelList, pResRow, pReader, freeTSRow); + } + } else { // ik.ts == k.ts + *freeTSRow = true; + pResRow->type = TSDBROW_ROW_FMT; + code = doMergeMemIMemRows(pRow, piRow, pBlockScanInfo, pReader, &pResRow->pTSRow); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + + return code; + } + + if (pBlockScanInfo->iter.hasVal && pRow != NULL) { + return doMergeMemTableMultiRows(pRow, pBlockScanInfo->uid, &pBlockScanInfo->iter, pDelList, pResRow, pReader, + freeTSRow); + } + + if (pBlockScanInfo->iiter.hasVal && piRow != NULL) { + return doMergeMemTableMultiRows(piRow, uid, &pBlockScanInfo->iiter, pDelList, pResRow, pReader, freeTSRow); + } + + return TSDB_CODE_SUCCESS; +} + +int32_t doAppendRowFromTSRow(SSDataBlock* pBlock, STsdbReader* pReader, SRow* pTSRow, STableBlockScanInfo* pScanInfo) { + int32_t outputRowIndex = pBlock->info.rows; + int64_t uid = pScanInfo->uid; + int32_t code = TSDB_CODE_SUCCESS; + + SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo; + STSchema* pSchema = doGetSchemaForTSRow(pTSRow->sver, pReader, uid); + if (pSchema == NULL) { + return terrno; + } + + SColVal colVal = {0}; + int32_t i = 0, j = 0; + + if (pSupInfo->colId[i] == PRIMARYKEY_TIMESTAMP_COL_ID) { + SColumnInfoData* pColData = taosArrayGet(pBlock->pDataBlock, pSupInfo->slotId[i]); + ((int64_t*)pColData->pData)[outputRowIndex] = pTSRow->ts; + i += 1; + } + + while (i < pSupInfo->numOfCols && j < pSchema->numOfCols) { + col_id_t colId = pSupInfo->colId[i]; + + if (colId == pSchema->columns[j].colId) { + SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, pSupInfo->slotId[i]); + + tRowGet(pTSRow, pSchema, j, &colVal); + code = doCopyColVal(pColInfoData, outputRowIndex, i, &colVal, pSupInfo); + if (code) { + return code; + } + i += 1; + j += 1; + } else if (colId < pSchema->columns[j].colId) { + SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, pSupInfo->slotId[i]); + + colDataSetNULL(pColInfoData, outputRowIndex); + i += 1; + } else if (colId > pSchema->columns[j].colId) { + j += 1; + } + } + + // set null value since current column does not exist in the "pSchema" + while (i < pSupInfo->numOfCols) { + SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, pSupInfo->slotId[i]); + colDataSetNULL(pColInfoData, outputRowIndex); + i += 1; + } + + pBlock->info.dataLoad = 1; + pBlock->info.rows += 1; + pScanInfo->lastKey = pTSRow->ts; + return TSDB_CODE_SUCCESS; +} + +int32_t doAppendRowFromFileBlock(SSDataBlock* pResBlock, STsdbReader* pReader, SBlockData* pBlockData, + int32_t rowIndex) { + int32_t i = 0, j = 0; + int32_t outputRowIndex = pResBlock->info.rows; + int32_t code = TSDB_CODE_SUCCESS; + + SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo; + ((int64_t*)pReader->status.pPrimaryTsCol->pData)[outputRowIndex] = pBlockData->aTSKEY[rowIndex]; + i += 1; + + SColVal cv = {0}; + int32_t numOfInputCols = pBlockData->nColData; + int32_t numOfOutputCols = pSupInfo->numOfCols; + + while (i < numOfOutputCols && j < numOfInputCols) { + SColData* pData = tBlockDataGetColDataByIdx(pBlockData, j); + if (pData->cid < pSupInfo->colId[i]) { + j += 1; + continue; + } + + SColumnInfoData* pCol = TARRAY_GET_ELEM(pResBlock->pDataBlock, pSupInfo->slotId[i]); + if (pData->cid == pSupInfo->colId[i]) { + tColDataGetValue(pData, rowIndex, &cv); + code = doCopyColVal(pCol, outputRowIndex, i, &cv, pSupInfo); + if (code) { + return code; + } + j += 1; + } else if (pData->cid > pCol->info.colId) { + // the specified column does not exist in file block, fill with null data + colDataSetNULL(pCol, outputRowIndex); + } + + i += 1; + } + + while (i < numOfOutputCols) { + SColumnInfoData* pCol = taosArrayGet(pResBlock->pDataBlock, pSupInfo->slotId[i]); + colDataSetNULL(pCol, outputRowIndex); + i += 1; + } + + pResBlock->info.dataLoad = 1; + pResBlock->info.rows += 1; + return TSDB_CODE_SUCCESS; +} + +int32_t buildDataBlockFromBufImpl(STableBlockScanInfo* pBlockScanInfo, int64_t endKey, int32_t capacity, + STsdbReader* pReader) { + SSDataBlock* pBlock = pReader->resBlockInfo.pResBlock; + int32_t code = TSDB_CODE_SUCCESS; + + do { + // SRow* pTSRow = NULL; + TSDBROW row = {.type = -1}; + bool freeTSRow = false; + tsdbGetNextRowInMem(pBlockScanInfo, pReader, &row, endKey, &freeTSRow); + if (row.type == -1) { + break; + } + + if (row.type == TSDBROW_ROW_FMT) { + code = doAppendRowFromTSRow(pBlock, pReader, row.pTSRow, pBlockScanInfo); + + if (freeTSRow) { + taosMemoryFree(row.pTSRow); + } + + if (code) { + return code; + } + } else { + code = doAppendRowFromFileBlock(pBlock, pReader, row.pBlockData, row.iRow); + if (code) { + break; + } + } + + // no data in buffer, return immediately + if (!(pBlockScanInfo->iter.hasVal || pBlockScanInfo->iiter.hasVal)) { + break; + } + + if (pBlock->info.rows >= capacity) { + break; + } + } while (1); + + return code; +} + +// TODO refactor: with createDataBlockScanInfo +int32_t tsdbSetTableList2(STsdbReader* pReader, const void* pTableList, int32_t num) { + int32_t size = tSimpleHashGetSize(pReader->status.pTableMap); + + STableBlockScanInfo** p = NULL; + int32_t iter = 0; + + while ((p = tSimpleHashIterate(pReader->status.pTableMap, p, &iter)) != NULL) { + clearBlockScanInfo(*p); + } + + if (size < num) { + int32_t code = ensureBlockScanInfoBuf(&pReader->blockInfoBuf, num); + if (code) { + return code; + } + + char* p1 = taosMemoryRealloc(pReader->status.uidList.tableUidList, sizeof(uint64_t) * num); + if (p1 == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + pReader->status.uidList.tableUidList = (uint64_t*)p1; + } + + tSimpleHashClear(pReader->status.pTableMap); + STableUidList* pUidList = &pReader->status.uidList; + pUidList->currentIndex = 0; + + STableKeyInfo* pList = (STableKeyInfo*)pTableList; + for (int32_t i = 0; i < num; ++i) { + STableBlockScanInfo* pInfo = getPosInBlockInfoBuf(&pReader->blockInfoBuf, i); + pInfo->uid = pList[i].uid; + pUidList->tableUidList[i] = pList[i].uid; + + // todo extract method + if (ASCENDING_TRAVERSE(pReader->info.order)) { + int64_t skey = pReader->info.window.skey; + pInfo->lastKey = (skey > INT64_MIN) ? (skey - 1) : skey; + pInfo->lastKeyInStt = skey; + } else { + int64_t ekey = pReader->info.window.ekey; + pInfo->lastKey = (ekey < INT64_MAX) ? (ekey + 1) : ekey; + pInfo->lastKeyInStt = ekey; + } + + tSimpleHashPut(pReader->status.pTableMap, &pInfo->uid, sizeof(uint64_t), &pInfo, POINTER_BYTES); + } + + return TDB_CODE_SUCCESS; +} + +void* tsdbGetIdx2(SMeta* pMeta) { + if (pMeta == NULL) { + return NULL; + } + return metaGetIdx(pMeta); +} + +void* tsdbGetIvtIdx2(SMeta* pMeta) { + if (pMeta == NULL) { + return NULL; + } + return metaGetIvtIdx(pMeta); +} + +uint64_t tsdbGetReaderMaxVersion2(STsdbReader* pReader) { return pReader->info.verRange.maxVer; } + +static int32_t doOpenReaderImpl(STsdbReader* pReader) { + SReaderStatus* pStatus = &pReader->status; + SDataBlockIter* pBlockIter = &pStatus->blockIter; + + initFilesetIterator(&pStatus->fileIter, pReader->pReadSnap->pfSetArray, pReader); + resetDataBlockIterator(&pStatus->blockIter, pReader->info.order); + + int32_t code = TSDB_CODE_SUCCESS; + if (pStatus->fileIter.numOfFiles == 0) { + pStatus->loadFromFile = false; + } else if (READ_MODE_COUNT_ONLY == pReader->info.readMode) { + // DO NOTHING + } else { + code = initForFirstBlockInFile(pReader, pBlockIter); + } + + if (!pStatus->loadFromFile) { + resetTableListIndex(pStatus); + } + + return code; +} + +static void freeSchemaFunc(void* param) { + void** p = (void**)param; + taosMemoryFreeClear(*p); +} + +static void clearSharedPtr(STsdbReader* p) { + p->status.pTableMap = NULL; + p->status.uidList.tableUidList = NULL; + p->info.pSchema = NULL; + p->pReadSnap = NULL; + p->pSchemaMap = NULL; +} + +static void setSharedPtr(STsdbReader* pDst, const STsdbReader* pSrc) { + pDst->status.pTableMap = pSrc->status.pTableMap; + pDst->status.uidList = pSrc->status.uidList; + pDst->info.pSchema = pSrc->info.pSchema; + pDst->pSchemaMap = pSrc->pSchemaMap; + pDst->pReadSnap = pSrc->pReadSnap; + pDst->pReadSnap->pfSetArray = pSrc->pReadSnap->pfSetArray; + + if (pDst->info.pSchema) { + tsdbRowMergerInit(&pDst->status.merger, pDst->info.pSchema); + } +} + +// ====================================== EXPOSED APIs ====================================== +int32_t tsdbReaderOpen2(void* pVnode, SQueryTableDataCond* pCond, void* pTableList, int32_t numOfTables, + SSDataBlock* pResBlock, void** ppReader, const char* idstr, bool countOnly, + SHashObj** pIgnoreTables) { + STimeWindow window = pCond->twindows; + SVnodeCfg* pConf = &(((SVnode*)pVnode)->config); + + int32_t capacity = pConf->tsdbCfg.maxRows; + if (pResBlock != NULL) { + blockDataEnsureCapacity(pResBlock, capacity); + } + + int32_t code = tsdbReaderCreate(pVnode, pCond, ppReader, capacity, pResBlock, idstr); + if (code != TSDB_CODE_SUCCESS) { + goto _err; + } + + // check for query time window + STsdbReader* pReader = *ppReader; + if (isEmptyQueryTimeWindow(&pReader->info.window) && pCond->type == TIMEWINDOW_RANGE_CONTAINED) { + tsdbDebug("%p query window not overlaps with the data set, no result returned, %s", pReader, pReader->idStr); + return TSDB_CODE_SUCCESS; + } + + if (pCond->type == TIMEWINDOW_RANGE_EXTERNAL) { + // update the SQueryTableDataCond to create inner reader + int32_t order = pCond->order; + if (order == TSDB_ORDER_ASC) { + pCond->twindows.ekey = window.skey - 1; + pCond->twindows.skey = INT64_MIN; + pCond->order = TSDB_ORDER_DESC; + } else { + pCond->twindows.skey = window.ekey + 1; + pCond->twindows.ekey = INT64_MAX; + pCond->order = TSDB_ORDER_ASC; + } + + // here we only need one more row, so the capacity is set to be ONE. + code = tsdbReaderCreate(pVnode, pCond, (void**)&((STsdbReader*)pReader)->innerReader[0], 1, pResBlock, idstr); + if (code != TSDB_CODE_SUCCESS) { + goto _err; + } + + if (order == TSDB_ORDER_ASC) { + pCond->twindows.skey = window.ekey + 1; + pCond->twindows.ekey = INT64_MAX; + } else { + pCond->twindows.skey = INT64_MIN; + pCond->twindows.ekey = window.ekey - 1; + } + pCond->order = order; + + code = tsdbReaderCreate(pVnode, pCond, (void**)&((STsdbReader*)pReader)->innerReader[1], 1, pResBlock, idstr); + if (code != TSDB_CODE_SUCCESS) { + goto _err; + } + } + + // NOTE: the endVersion in pCond is the data version not schema version, so pCond->endVersion is not correct here. + // no valid error code set in metaGetTbTSchema, so let's set the error code here. + // we should proceed in case of tmq processing. + if (pCond->suid != 0) { + pReader->info.pSchema = metaGetTbTSchema(pReader->pTsdb->pVnode->pMeta, pReader->info.suid, -1, 1); + if (pReader->info.pSchema == NULL) { + tsdbError("failed to get table schema, suid:%" PRIu64 ", ver:-1, %s", pReader->info.suid, pReader->idStr); + } + } else if (numOfTables > 0) { + STableKeyInfo* pKey = pTableList; + pReader->info.pSchema = metaGetTbTSchema(pReader->pTsdb->pVnode->pMeta, pKey->uid, -1, 1); + if (pReader->info.pSchema == NULL) { + tsdbError("failed to get table schema, uid:%" PRIu64 ", ver:-1, %s", pKey->uid, pReader->idStr); + } + } + + if (pReader->info.pSchema != NULL) { + tsdbRowMergerInit(&pReader->status.merger, pReader->info.pSchema); + } + + pReader->pSchemaMap = tSimpleHashInit(8, taosFastHash); + if (pReader->pSchemaMap == NULL) { + tsdbError("failed init schema hash for reader %s", pReader->idStr); + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + + tSimpleHashSetFreeFp(pReader->pSchemaMap, freeSchemaFunc); + if (pReader->info.pSchema != NULL) { + code = updateBlockSMAInfo(pReader->info.pSchema, &pReader->suppInfo); + if (code != TSDB_CODE_SUCCESS) { + goto _err; + } + } + + STsdbReader* p = (pReader->innerReader[0] != NULL) ? pReader->innerReader[0] : pReader; + pReader->status.pTableMap = + createDataBlockScanInfo(p, &pReader->blockInfoBuf, pTableList, &pReader->status.uidList, numOfTables); + if (pReader->status.pTableMap == NULL) { + *ppReader = NULL; + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + + pReader->status.pLDataIterArray = taosArrayInit(4, POINTER_BYTES); + if (pReader->status.pLDataIterArray == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + + pReader->flag = READER_STATUS_SUSPEND; + + if (countOnly) { + pReader->info.readMode = READ_MODE_COUNT_ONLY; + } + + pReader->pIgnoreTables = pIgnoreTables; + + tsdbDebug("%p total numOfTable:%d, window:%" PRId64 " - %" PRId64 ", verRange:%" PRId64 " - %" PRId64 + " in this query %s", + pReader, numOfTables, pReader->info.window.skey, pReader->info.window.ekey, pReader->info.verRange.minVer, + pReader->info.verRange.maxVer, pReader->idStr); + + return code; + +_err: + tsdbError("failed to create data reader, code:%s %s", tstrerror(code), idstr); + tsdbReaderClose2(*ppReader); + *ppReader = NULL; // reset the pointer value. + return code; +} + +void tsdbReaderClose2(STsdbReader* pReader) { + if (pReader == NULL) { + return; + } + + tsdbAcquireReader(pReader); + + { + if (pReader->innerReader[0] != NULL || pReader->innerReader[1] != NULL) { + STsdbReader* p = pReader->innerReader[0]; + clearSharedPtr(p); + + p = pReader->innerReader[1]; + clearSharedPtr(p); + + tsdbReaderClose2(pReader->innerReader[0]); + tsdbReaderClose2(pReader->innerReader[1]); + } + } + + SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo; + TARRAY2_DESTROY(&pSupInfo->colAggArray, NULL); + for (int32_t i = 0; i < pSupInfo->numOfCols; ++i) { + if (pSupInfo->buildBuf[i] != NULL) { + taosMemoryFreeClear(pSupInfo->buildBuf[i]); + } + } + + if (pReader->resBlockInfo.freeBlock) { + pReader->resBlockInfo.pResBlock = blockDataDestroy(pReader->resBlockInfo.pResBlock); + } + + taosMemoryFree(pSupInfo->colId); + tBlockDataDestroy(&pReader->status.fileBlockData); + cleanupDataBlockIterator(&pReader->status.blockIter); + + size_t numOfTables = tSimpleHashGetSize(pReader->status.pTableMap); + if (pReader->status.pTableMap != NULL) { + destroyAllBlockScanInfo(pReader->status.pTableMap); + clearBlockScanInfoBuf(&pReader->blockInfoBuf); + pReader->status.pTableMap = NULL; + } + + if (pReader->pFileReader != NULL) { + tsdbDataFileReaderClose(&pReader->pFileReader); + } + + qTrace("tsdb/reader-close: %p, untake snapshot", pReader); + tsdbUntakeReadSnap2(pReader, pReader->pReadSnap, true); + pReader->pReadSnap = NULL; + + tsdbReleaseReader(pReader); + tsdbUninitReaderLock(pReader); + + SCostSummary* pCost = &pReader->cost; + SFilesetIter* pFilesetIter = &pReader->status.fileIter; + if (pFilesetIter->pLastBlockReader != NULL) { + SLastBlockReader* pLReader = pFilesetIter->pLastBlockReader; + tMergeTreeClose(&pLReader->mergeTree); + taosMemoryFree(pLReader); + } + + destroySttBlockReader(pReader->status.pLDataIterArray, &pCost->lastBlockLoad, &pCost->lastBlockLoadTime); + taosMemoryFreeClear(pReader->status.uidList.tableUidList); + + tsdbDebug( + "%p :io-cost summary: head-file:%" PRIu64 ", head-file time:%.2f ms, SMA:%" PRId64 + " SMA-time:%.2f ms, fileBlocks:%" PRId64 + ", fileBlocks-load-time:%.2f ms, " + "build in-memory-block-time:%.2f ms, lastBlocks:%" PRId64 ", lastBlocks-time:%.2f ms, composed-blocks:%" PRId64 + ", composed-blocks-time:%.2fms, STableBlockScanInfo size:%.2f Kb, createTime:%.2f ms,createSkylineIterTime:%.2f " + "ms, initLastBlockReader:%.2fms, %s", + pReader, pCost->headFileLoad, pCost->headFileLoadTime, pCost->smaDataLoad, pCost->smaLoadTime, pCost->numOfBlocks, + pCost->blockLoadTime, pCost->buildmemBlock, pCost->lastBlockLoad, pCost->lastBlockLoadTime, pCost->composedBlocks, + pCost->buildComposedBlockTime, numOfTables * sizeof(STableBlockScanInfo) / 1000.0, pCost->createScanInfoList, + pCost->createSkylineIterTime, pCost->initLastBlockReader, pReader->idStr); + + taosMemoryFree(pReader->idStr); + + tsdbRowMergerCleanup(&pReader->status.merger); + taosMemoryFree(pReader->info.pSchema); + + tSimpleHashCleanup(pReader->pSchemaMap); + taosMemoryFreeClear(pReader); +} + +int32_t tsdbReaderSuspend2(STsdbReader* pReader) { + int32_t code = 0; + + // save reader's base state & reset top state to be reconstructed from base state + SReaderStatus* pStatus = &pReader->status; + STableBlockScanInfo* pBlockScanInfo = NULL; + + if (pStatus->loadFromFile) { + SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(&pReader->status.blockIter); + if (pBlockInfo != NULL) { + pBlockScanInfo = getTableBlockScanInfo(pStatus->pTableMap, pBlockInfo->uid, pReader->idStr); + if (pBlockScanInfo == NULL) { + goto _err; + } + } else { + pBlockScanInfo = *pStatus->pTableIter; + } + + tsdbDataFileReaderClose(&pReader->pFileReader); + + int64_t loadBlocks = 0; + double elapse = 0; + pReader->status.pLDataIterArray = destroySttBlockReader(pReader->status.pLDataIterArray, &loadBlocks, &elapse); + pReader->status.pLDataIterArray = taosArrayInit(4, POINTER_BYTES); + + // resetDataBlockScanInfo excluding lastKey + STableBlockScanInfo** p = NULL; + int32_t iter = 0; + + while ((p = tSimpleHashIterate(pStatus->pTableMap, p, &iter)) != NULL) { + STableBlockScanInfo* pInfo = *(STableBlockScanInfo**)p; + + pInfo->iterInit = false; + pInfo->iter.hasVal = false; + pInfo->iiter.hasVal = false; + + if (pInfo->iter.iter != NULL) { + pInfo->iter.iter = tsdbTbDataIterDestroy(pInfo->iter.iter); + } + + if (pInfo->iiter.iter != NULL) { + pInfo->iiter.iter = tsdbTbDataIterDestroy(pInfo->iiter.iter); + } + + pInfo->delSkyline = taosArrayDestroy(pInfo->delSkyline); + pInfo->pfileDelData = taosArrayDestroy(pInfo->pfileDelData); + } + } else { + // resetDataBlockScanInfo excluding lastKey + STableBlockScanInfo** p = NULL; + int32_t iter = 0; + + while ((p = tSimpleHashIterate(pStatus->pTableMap, p, &iter)) != NULL) { + STableBlockScanInfo* pInfo = *(STableBlockScanInfo**)p; + + pInfo->iterInit = false; + pInfo->iter.hasVal = false; + pInfo->iiter.hasVal = false; + + if (pInfo->iter.iter != NULL) { + pInfo->iter.iter = tsdbTbDataIterDestroy(pInfo->iter.iter); + } + + if (pInfo->iiter.iter != NULL) { + pInfo->iiter.iter = tsdbTbDataIterDestroy(pInfo->iiter.iter); + } + + pInfo->delSkyline = taosArrayDestroy(pInfo->delSkyline); + } + + pBlockScanInfo = pStatus->pTableIter == NULL ? NULL : *pStatus->pTableIter; + if (pBlockScanInfo) { + // save lastKey to restore memory iterator + STimeWindow w = pReader->resBlockInfo.pResBlock->info.window; + pBlockScanInfo->lastKey = ASCENDING_TRAVERSE(pReader->info.order) ? w.ekey : w.skey; + + // reset current current table's data block scan info, + pBlockScanInfo->iterInit = false; + + pBlockScanInfo->iter.hasVal = false; + pBlockScanInfo->iiter.hasVal = false; + if (pBlockScanInfo->iter.iter != NULL) { + pBlockScanInfo->iter.iter = tsdbTbDataIterDestroy(pBlockScanInfo->iter.iter); + } + + if (pBlockScanInfo->iiter.iter != NULL) { + pBlockScanInfo->iiter.iter = tsdbTbDataIterDestroy(pBlockScanInfo->iiter.iter); + } + + pBlockScanInfo->pBlockList = taosArrayDestroy(pBlockScanInfo->pBlockList); + // TODO: keep skyline for reuse + pBlockScanInfo->delSkyline = taosArrayDestroy(pBlockScanInfo->delSkyline); + } + } + + tsdbUntakeReadSnap2(pReader, pReader->pReadSnap, false); + pReader->pReadSnap = NULL; + pReader->flag = READER_STATUS_SUSPEND; + + tsdbDebug("reader: %p suspended uid %" PRIu64 " in this query %s", pReader, pBlockScanInfo ? pBlockScanInfo->uid : 0, + pReader->idStr); + return code; + +_err: + tsdbError("failed to suspend data reader, code:%s %s", tstrerror(code), pReader->idStr); + return code; +} + +static int32_t tsdbSetQueryReseek(void* pQHandle) { + int32_t code = 0; + STsdbReader* pReader = pQHandle; + + code = tsdbTryAcquireReader(pReader); + if (code == 0) { + if (pReader->flag == READER_STATUS_SUSPEND) { + tsdbReleaseReader(pReader); + return code; + } + + tsdbReaderSuspend2(pReader); + + tsdbReleaseReader(pReader); + + return code; + } else if (code == EBUSY) { + return TSDB_CODE_VND_QUERY_BUSY; + } else { + terrno = TAOS_SYSTEM_ERROR(code); + return TSDB_CODE_FAILED; + } +} + +int32_t tsdbReaderResume2(STsdbReader* pReader) { + int32_t code = 0; + + STableBlockScanInfo** pBlockScanInfo = pReader->status.pTableIter; + + // restore reader's state + // task snapshot + int32_t numOfTables = tSimpleHashGetSize(pReader->status.pTableMap); + if (numOfTables > 0) { + qTrace("tsdb/reader: %p, take snapshot", pReader); + code = tsdbTakeReadSnap2(pReader, tsdbSetQueryReseek, &pReader->pReadSnap); + if (code != TSDB_CODE_SUCCESS) { + goto _err; + } + + if (pReader->type == TIMEWINDOW_RANGE_CONTAINED) { + code = doOpenReaderImpl(pReader); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } else { + STsdbReader* pPrevReader = pReader->innerReader[0]; + STsdbReader* pNextReader = pReader->innerReader[1]; + + // we need only one row + pPrevReader->resBlockInfo.capacity = 1; + setSharedPtr(pPrevReader, pReader); + + pNextReader->resBlockInfo.capacity = 1; + setSharedPtr(pNextReader, pReader); + + code = doOpenReaderImpl(pPrevReader); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + } + + pReader->flag = READER_STATUS_NORMAL; + tsdbDebug("reader: %p resumed uid %" PRIu64 ", numOfTable:%" PRId32 ", in this query %s", pReader, + pBlockScanInfo ? (*pBlockScanInfo)->uid : 0, numOfTables, pReader->idStr); + return code; + +_err: + tsdbError("failed to resume data reader, code:%s %s", tstrerror(code), pReader->idStr); + return code; +} + +static bool tsdbReadRowsCountOnly(STsdbReader* pReader) { + int32_t code = TSDB_CODE_SUCCESS; + SSDataBlock* pBlock = pReader->resBlockInfo.pResBlock; + + if (pReader->status.loadFromFile == false) { + return false; + } + + code = readRowsCountFromFiles(pReader); + if (code != TSDB_CODE_SUCCESS) { + return false; + } + + code = readRowsCountFromMem(pReader); + if (code != TSDB_CODE_SUCCESS) { + return false; + } + + pBlock->info.rows = pReader->rowsNum; + pBlock->info.id.uid = 0; + pBlock->info.dataLoad = 0; + + pReader->rowsNum = 0; + + return pBlock->info.rows > 0; +} + +static int32_t doTsdbNextDataBlock2(STsdbReader* pReader, bool* hasNext) { + int32_t code = TSDB_CODE_SUCCESS; + + // cleanup the data that belongs to the previous data block + SSDataBlock* pBlock = pReader->resBlockInfo.pResBlock; + blockDataCleanup(pBlock); + + *hasNext = false; + + SReaderStatus* pStatus = &pReader->status; + if (tSimpleHashGetSize(pStatus->pTableMap) == 0) { + return code; + } + + if (READ_MODE_COUNT_ONLY == pReader->info.readMode) { + return tsdbReadRowsCountOnly(pReader); + } + + if (pStatus->loadFromFile) { + code = buildBlockFromFiles(pReader); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + if (pBlock->info.rows <= 0) { + resetTableListIndex(&pReader->status); + code = buildBlockFromBufferSequentially(pReader); + } + } else { // no data in files, let's try the buffer + code = buildBlockFromBufferSequentially(pReader); + } + + *hasNext = pBlock->info.rows > 0; + + return code; +} + +int32_t tsdbNextDataBlock2(STsdbReader* pReader, bool* hasNext) { + int32_t code = TSDB_CODE_SUCCESS; + + *hasNext = false; + + if (isEmptyQueryTimeWindow(&pReader->info.window) || pReader->step == EXTERNAL_ROWS_NEXT || + pReader->code != TSDB_CODE_SUCCESS) { + return (pReader->code != TSDB_CODE_SUCCESS) ? pReader->code : code; + } + + SReaderStatus* pStatus = &pReader->status; + + code = tsdbAcquireReader(pReader); + qTrace("tsdb/read: %p, take read mutex, code: %d", pReader, code); + + if (pReader->flag == READER_STATUS_SUSPEND) { + code = tsdbReaderResume2(pReader); + if (code != TSDB_CODE_SUCCESS) { + tsdbReleaseReader(pReader); + return code; + } + } + + if (pReader->innerReader[0] != NULL && pReader->step == 0) { + code = doTsdbNextDataBlock2(pReader->innerReader[0], hasNext); + if (code) { + tsdbReleaseReader(pReader); + return code; + } + + pReader->step = EXTERNAL_ROWS_PREV; + if (*hasNext) { + pStatus = &pReader->innerReader[0]->status; + if (pStatus->composedDataBlock) { + qTrace("tsdb/read: %p, unlock read mutex", pReader); + tsdbReleaseReader(pReader); + } + + return code; + } + } + + if (pReader->step == EXTERNAL_ROWS_PREV) { + // prepare for the main scan + code = doOpenReaderImpl(pReader); + int32_t step = 1; + resetAllDataBlockScanInfo(pReader->status.pTableMap, pReader->innerReader[0]->info.window.ekey, step); + + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + pReader->step = EXTERNAL_ROWS_MAIN; + } + + code = doTsdbNextDataBlock2(pReader, hasNext); + if (code != TSDB_CODE_SUCCESS) { + tsdbReleaseReader(pReader); + return code; + } + + if (*hasNext) { + if (pStatus->composedDataBlock) { + qTrace("tsdb/read: %p, unlock read mutex", pReader); + tsdbReleaseReader(pReader); + } + + return code; + } + + if (pReader->step == EXTERNAL_ROWS_MAIN && pReader->innerReader[1] != NULL) { + // prepare for the next row scan + int32_t step = -1; + code = doOpenReaderImpl(pReader->innerReader[1]); + resetAllDataBlockScanInfo(pReader->innerReader[1]->status.pTableMap, pReader->info.window.ekey, step); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + code = doTsdbNextDataBlock2(pReader->innerReader[1], hasNext); + if (code != TSDB_CODE_SUCCESS) { + tsdbReleaseReader(pReader); + return code; + } + + pReader->step = EXTERNAL_ROWS_NEXT; + if (*hasNext) { + pStatus = &pReader->innerReader[1]->status; + if (pStatus->composedDataBlock) { + qTrace("tsdb/read: %p, unlock read mutex", pReader); + tsdbReleaseReader(pReader); + } + + return code; + } + } + + qTrace("tsdb/read: %p, unlock read mutex", pReader); + tsdbReleaseReader(pReader); + + return code; +} + +static void doFillNullColSMA(SBlockLoadSuppInfo* pSup, int32_t numOfRows, int32_t numOfCols, SColumnDataAgg* pTsAgg) { + // do fill all null column value SMA info + int32_t i = 0, j = 0; + int32_t size = (int32_t)TARRAY2_SIZE(&pSup->colAggArray); + TARRAY2_INSERT_PTR(&pSup->colAggArray, 0, pTsAgg); + size++; + + while (j < numOfCols && i < size) { + SColumnDataAgg* pAgg = &pSup->colAggArray.data[i]; + if (pAgg->colId == pSup->colId[j]) { + i += 1; + j += 1; + } else if (pAgg->colId < pSup->colId[j]) { + i += 1; + } else if (pSup->colId[j] < pAgg->colId) { + if (pSup->colId[j] != PRIMARYKEY_TIMESTAMP_COL_ID) { + SColumnDataAgg nullColAgg = {.colId = pSup->colId[j], .numOfNull = numOfRows}; + TARRAY2_INSERT_PTR(&pSup->colAggArray, i, &nullColAgg); + i += 1; + size++; + } + j += 1; + } + } + + while (j < numOfCols) { + if (pSup->colId[j] != PRIMARYKEY_TIMESTAMP_COL_ID) { + SColumnDataAgg nullColAgg = {.colId = pSup->colId[j], .numOfNull = numOfRows}; + TARRAY2_INSERT_PTR(&pSup->colAggArray, i, &nullColAgg); + i += 1; + } + j++; + } +} + +int32_t tsdbRetrieveDatablockSMA2(STsdbReader* pReader, SSDataBlock* pDataBlock, bool* allHave, bool* hasNullSMA) { + SColumnDataAgg*** pBlockSMA = &pDataBlock->pBlockAgg; + + int32_t code = 0; + *allHave = false; + *pBlockSMA = NULL; + + if (pReader->type == TIMEWINDOW_RANGE_EXTERNAL) { + return TSDB_CODE_SUCCESS; + } + + // there is no statistics data for composed block + if (pReader->status.composedDataBlock || (!pReader->suppInfo.smaValid)) { + return TSDB_CODE_SUCCESS; + } + + SFileDataBlockInfo* pFBlock = getCurrentBlockInfo(&pReader->status.blockIter); + SBlockLoadSuppInfo* pSup = &pReader->suppInfo; + + SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; + if (pResBlock->info.id.uid != pFBlock->uid) { + return TSDB_CODE_SUCCESS; + } + + // int64_t st = taosGetTimestampUs(); + TARRAY2_CLEAR(&pSup->colAggArray, 0); + + code = tsdbDataFileReadBlockSma(pReader->pFileReader, &pFBlock->record, &pSup->colAggArray); + if (code != TSDB_CODE_SUCCESS) { + tsdbDebug("vgId:%d, failed to load block SMA for uid %" PRIu64 ", code:%s, %s", 0, pFBlock->uid, tstrerror(code), + pReader->idStr); + return code; + } + + if (pSup->colAggArray.size > 0) { + *allHave = true; + } else { + *pBlockSMA = NULL; + return TSDB_CODE_SUCCESS; + } + + // always load the first primary timestamp column data + SColumnDataAgg* pTsAgg = &pSup->tsColAgg; + + pTsAgg->numOfNull = 0; + pTsAgg->colId = PRIMARYKEY_TIMESTAMP_COL_ID; + pTsAgg->min = pResBlock->info.window.skey; + pTsAgg->max = pResBlock->info.window.ekey; + + // update the number of NULL data rows + size_t numOfCols = pSup->numOfCols; + + if (pResBlock->pBlockAgg == NULL) { + size_t num = taosArrayGetSize(pResBlock->pDataBlock); + pResBlock->pBlockAgg = taosMemoryCalloc(num, POINTER_BYTES); + } + + // do fill all null column value SMA info + doFillNullColSMA(pSup, pFBlock->record.numRow, numOfCols, pTsAgg); + + size_t size = pSup->colAggArray.size; + + int32_t i = 0, j = 0; + while (j < numOfCols && i < size) { + SColumnDataAgg* pAgg = &pSup->colAggArray.data[i]; + if (pAgg->colId == pSup->colId[j]) { + pResBlock->pBlockAgg[pSup->slotId[j]] = pAgg; + i += 1; + j += 1; + } else if (pAgg->colId < pSup->colId[j]) { + i += 1; + } else if (pSup->colId[j] < pAgg->colId) { + pResBlock->pBlockAgg[pSup->slotId[j]] = NULL; + *allHave = false; + j += 1; + } + } + + *pBlockSMA = pResBlock->pBlockAgg; + pReader->cost.smaDataLoad += 1; + + // double elapsedTime = (taosGetTimestampUs() - st) / 1000.0; + pReader->cost.smaLoadTime += 0; // elapsedTime; + + tsdbDebug("vgId:%d, succeed to load block SMA for uid %" PRIu64 ", %s", 0, pFBlock->uid, pReader->idStr); + return code; +} + +static SSDataBlock* doRetrieveDataBlock(STsdbReader* pReader) { + SReaderStatus* pStatus = &pReader->status; + int32_t code = TSDB_CODE_SUCCESS; + SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(&pStatus->blockIter); + + if (pReader->code != TSDB_CODE_SUCCESS) { + return NULL; + } + + STableBlockScanInfo* pBlockScanInfo = getTableBlockScanInfo(pStatus->pTableMap, pBlockInfo->uid, pReader->idStr); + if (pBlockScanInfo == NULL) { + return NULL; + } + + code = doLoadFileBlockData(pReader, &pStatus->blockIter, &pStatus->fileBlockData, pBlockScanInfo->uid); + if (code != TSDB_CODE_SUCCESS) { + tBlockDataDestroy(&pStatus->fileBlockData); + terrno = code; + return NULL; + } + + code = copyBlockDataToSDataBlock(pReader); + if (code != TSDB_CODE_SUCCESS) { + tBlockDataDestroy(&pStatus->fileBlockData); + terrno = code; + return NULL; + } + + return pReader->resBlockInfo.pResBlock; +} + +SSDataBlock* tsdbRetrieveDataBlock2(STsdbReader* pReader, SArray* pIdList) { + STsdbReader* pTReader = pReader; + if (pReader->type == TIMEWINDOW_RANGE_EXTERNAL) { + if (pReader->step == EXTERNAL_ROWS_PREV) { + pTReader = pReader->innerReader[0]; + } else if (pReader->step == EXTERNAL_ROWS_NEXT) { + pTReader = pReader->innerReader[1]; + } + } + + SReaderStatus* pStatus = &pTReader->status; + if (pStatus->composedDataBlock) { + return pTReader->resBlockInfo.pResBlock; + } + + SSDataBlock* ret = doRetrieveDataBlock(pTReader); + + qTrace("tsdb/read-retrieve: %p, unlock read mutex", pReader); + tsdbReleaseReader(pReader); + + return ret; +} + +int32_t tsdbReaderReset2(STsdbReader* pReader, SQueryTableDataCond* pCond) { + int32_t code = TSDB_CODE_SUCCESS; + + qTrace("tsdb/reader-reset: %p, take read mutex", pReader); + tsdbAcquireReader(pReader); + + if (pReader->flag == READER_STATUS_SUSPEND) { + code = tsdbReaderResume2(pReader); + if (code != TSDB_CODE_SUCCESS) { + tsdbReleaseReader(pReader); + return code; + } + } + + if (isEmptyQueryTimeWindow(&pReader->info.window) || pReader->pReadSnap == NULL) { + tsdbDebug("tsdb reader reset return %p, %s", pReader->pReadSnap, pReader->idStr); + tsdbReleaseReader(pReader); + return TSDB_CODE_SUCCESS; + } + + SReaderStatus* pStatus = &pReader->status; + SDataBlockIter* pBlockIter = &pStatus->blockIter; + + pReader->info.order = pCond->order; + pReader->type = TIMEWINDOW_RANGE_CONTAINED; + pStatus->loadFromFile = true; + pStatus->pTableIter = NULL; + pReader->info.window = updateQueryTimeWindow(pReader->pTsdb, &pCond->twindows); + + // allocate buffer in order to load data blocks from file + memset(&pReader->suppInfo.tsColAgg, 0, sizeof(SColumnDataAgg)); + + pReader->suppInfo.tsColAgg.colId = PRIMARYKEY_TIMESTAMP_COL_ID; + tsdbDataFileReaderClose(&pReader->pFileReader); + + int32_t numOfTables = tSimpleHashGetSize(pStatus->pTableMap); + + initFilesetIterator(&pStatus->fileIter, pReader->pReadSnap->pfSetArray, pReader); + resetDataBlockIterator(pBlockIter, pReader->info.order); + resetTableListIndex(&pReader->status); + + bool asc = ASCENDING_TRAVERSE(pReader->info.order); + int32_t step = asc ? 1 : -1; + int64_t ts = asc ? pReader->info.window.skey - 1 : pReader->info.window.ekey + 1; + resetAllDataBlockScanInfo(pStatus->pTableMap, ts, step); + + // no data in files, let's try buffer in memory + if (pStatus->fileIter.numOfFiles == 0) { + pStatus->loadFromFile = false; + resetTableListIndex(pStatus); + } else { + code = initForFirstBlockInFile(pReader, pBlockIter); + if (code != TSDB_CODE_SUCCESS) { + tsdbError("%p reset reader failed, numOfTables:%d, query range:%" PRId64 " - %" PRId64 " in query %s", pReader, + numOfTables, pReader->info.window.skey, pReader->info.window.ekey, pReader->idStr); + + tsdbReleaseReader(pReader); + return code; + } + } + + tsdbDebug("%p reset reader, suid:%" PRIu64 ", numOfTables:%d, skey:%" PRId64 ", query range:%" PRId64 " - %" PRId64 + " in query %s", + pReader, pReader->info.suid, numOfTables, pCond->twindows.skey, pReader->info.window.skey, + pReader->info.window.ekey, pReader->idStr); + + tsdbReleaseReader(pReader); + + return code; +} + +static int32_t getBucketIndex(int32_t startRow, int32_t bucketRange, int32_t numOfRows, int32_t numOfBucket) { + if (numOfRows < startRow) { + return 0; + } + int32_t bucketIndex = ((numOfRows - startRow) / bucketRange); + if (bucketIndex == numOfBucket) { + bucketIndex -= 1; + } + return bucketIndex; +} + +int32_t tsdbGetFileBlocksDistInfo2(STsdbReader* pReader, STableBlockDistInfo* pTableBlockInfo) { + int32_t code = TSDB_CODE_SUCCESS; + pTableBlockInfo->totalSize = 0; + pTableBlockInfo->totalRows = 0; + pTableBlockInfo->numOfVgroups = 1; + + const int32_t numOfBuckets = 20.0; + const int32_t defaultRows = 4096; + + // find the start data block in file + tsdbAcquireReader(pReader); + if (pReader->flag == READER_STATUS_SUSPEND) { + code = tsdbReaderResume2(pReader); + if (code != TSDB_CODE_SUCCESS) { + tsdbReleaseReader(pReader); + return code; + } + } + SReaderStatus* pStatus = &pReader->status; + + STsdbCfg* pc = &pReader->pTsdb->pVnode->config.tsdbCfg; + pTableBlockInfo->defMinRows = pc->minRows; + pTableBlockInfo->defMaxRows = pc->maxRows; + + int32_t bucketRange = ceil(((double)(pc->maxRows - pc->minRows)) / numOfBuckets); + + pTableBlockInfo->numOfFiles += 1; + + int32_t numOfTables = (int32_t)tSimpleHashGetSize(pStatus->pTableMap); + + SDataBlockIter* pBlockIter = &pStatus->blockIter; + pTableBlockInfo->numOfFiles += pStatus->fileIter.numOfFiles; + + if (pBlockIter->numOfBlocks > 0) { + pTableBlockInfo->numOfBlocks += pBlockIter->numOfBlocks; + } + + pTableBlockInfo->numOfTables = numOfTables; + bool hasNext = (pBlockIter->numOfBlocks > 0); + + while (true) { + if (hasNext) { + SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(pBlockIter); + int32_t numOfRows = pBlockInfo->record.numRow; + + pTableBlockInfo->totalRows += numOfRows; + + if (numOfRows > pTableBlockInfo->maxRows) { + pTableBlockInfo->maxRows = numOfRows; + } + + if (numOfRows < pTableBlockInfo->minRows) { + pTableBlockInfo->minRows = numOfRows; + } + + if (numOfRows < defaultRows) { + pTableBlockInfo->numOfSmallBlocks += 1; + } + + pTableBlockInfo->totalSize += pBlockInfo->record.blockSize; + + int32_t bucketIndex = getBucketIndex(pTableBlockInfo->defMinRows, bucketRange, numOfRows, numOfBuckets); + pTableBlockInfo->blockRowsHisto[bucketIndex]++; + + hasNext = blockIteratorNext(&pStatus->blockIter, pReader->idStr); + } else { + code = initForFirstBlockInFile(pReader, pBlockIter); + if ((code != TSDB_CODE_SUCCESS) || (pStatus->loadFromFile == false)) { + break; + } + + pTableBlockInfo->numOfBlocks += pBlockIter->numOfBlocks; + hasNext = (pBlockIter->numOfBlocks > 0); + } + + // tsdbDebug("%p %d blocks found in file for %d table(s), fid:%d, %s", pReader, numOfBlocks, numOfTables, + // pReader->pFileGroup->fid, pReader->idStr); + } + tsdbReleaseReader(pReader); + return code; +} + +int64_t tsdbGetNumOfRowsInMemTable2(STsdbReader* pReader) { + int32_t code = TSDB_CODE_SUCCESS; + int64_t rows = 0; + + SReaderStatus* pStatus = &pReader->status; + tsdbAcquireReader(pReader); + if (pReader->flag == READER_STATUS_SUSPEND) { + code = tsdbReaderResume2(pReader); + if (code != TSDB_CODE_SUCCESS) { + tsdbReleaseReader(pReader); + return code; + } + } + + int32_t iter = 0; + pStatus->pTableIter = tSimpleHashIterate(pStatus->pTableMap, NULL, &iter); + + while (pStatus->pTableIter != NULL) { + STableBlockScanInfo* pBlockScanInfo = *(STableBlockScanInfo**)pStatus->pTableIter; + + STbData* d = NULL; + if (pReader->pReadSnap->pMem != NULL) { + d = tsdbGetTbDataFromMemTable(pReader->pReadSnap->pMem, pReader->info.suid, pBlockScanInfo->uid); + if (d != NULL) { + rows += tsdbGetNRowsInTbData(d); + } + } + + STbData* di = NULL; + if (pReader->pReadSnap->pIMem != NULL) { + di = tsdbGetTbDataFromMemTable(pReader->pReadSnap->pIMem, pReader->info.suid, pBlockScanInfo->uid); + if (di != NULL) { + rows += tsdbGetNRowsInTbData(di); + } + } + + // current table is exhausted, let's try the next table + pStatus->pTableIter = tSimpleHashIterate(pStatus->pTableMap, pStatus->pTableIter, &iter); + } + + tsdbReleaseReader(pReader); + + return rows; +} + +int32_t tsdbGetTableSchema2(void* pVnode, int64_t uid, STSchema** pSchema, int64_t* suid) { + SMetaReader mr = {0}; + metaReaderDoInit(&mr, ((SVnode*)pVnode)->pMeta, 0); + int32_t code = metaReaderGetTableEntryByUidCache(&mr, uid); + if (code != TSDB_CODE_SUCCESS) { + terrno = TSDB_CODE_TDB_INVALID_TABLE_ID; + metaReaderClear(&mr); + return terrno; + } + + *suid = 0; + + // only child table and ordinary table is allowed, super table is not allowed. + if (mr.me.type == TSDB_CHILD_TABLE) { + tDecoderClear(&mr.coder); + *suid = mr.me.ctbEntry.suid; + code = metaReaderGetTableEntryByUidCache(&mr, *suid); + if (code != TSDB_CODE_SUCCESS) { + terrno = TSDB_CODE_TDB_INVALID_TABLE_ID; + metaReaderClear(&mr); + return terrno; + } + } else if (mr.me.type == TSDB_NORMAL_TABLE) { // do nothing + } else { + terrno = TSDB_CODE_INVALID_PARA; + metaReaderClear(&mr); + return terrno; + } + + metaReaderClear(&mr); + + // get the newest table schema version + code = metaGetTbTSchemaEx(((SVnode*)pVnode)->pMeta, *suid, uid, -1, pSchema); + return code; +} + +int32_t tsdbTakeReadSnap2(STsdbReader* pReader, _query_reseek_func_t reseek, STsdbReadSnap** ppSnap) { + int32_t code = 0; + STsdb* pTsdb = pReader->pTsdb; + SVersionRange* pRange = &pReader->info.verRange; + + // lock + taosThreadRwlockRdlock(&pTsdb->rwLock); + + // alloc + STsdbReadSnap* pSnap = (STsdbReadSnap*)taosMemoryCalloc(1, sizeof(STsdbReadSnap)); + if (pSnap == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + + // take snapshot + if (pTsdb->mem && (pRange->minVer <= pTsdb->mem->maxVer && pRange->maxVer >= pTsdb->mem->minVer)) { + pSnap->pMem = pTsdb->mem; + pSnap->pNode = taosMemoryMalloc(sizeof(*pSnap->pNode)); + if (pSnap->pNode == NULL) { + taosThreadRwlockUnlock(&pTsdb->rwLock); + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + pSnap->pNode->pQHandle = pReader; + pSnap->pNode->reseek = reseek; + + tsdbRefMemTable(pTsdb->mem, pSnap->pNode); + } + + if (pTsdb->imem && (pRange->minVer <= pTsdb->imem->maxVer && pRange->maxVer >= pTsdb->imem->minVer)) { + pSnap->pIMem = pTsdb->imem; + pSnap->pINode = taosMemoryMalloc(sizeof(*pSnap->pINode)); + if (pSnap->pINode == NULL) { + taosThreadRwlockUnlock(&pTsdb->rwLock); + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + pSnap->pINode->pQHandle = pReader; + pSnap->pINode->reseek = reseek; + + tsdbRefMemTable(pTsdb->imem, pSnap->pINode); + } + + // fs + code = tsdbFSCreateRefSnapshot(pTsdb->pFS, &pSnap->pfSetArray); + if (code) { + taosThreadRwlockUnlock(&pTsdb->rwLock); + goto _exit; + } + + // unlock + taosThreadRwlockUnlock(&pTsdb->rwLock); + + tsdbTrace("vgId:%d, take read snapshot", TD_VID(pTsdb->pVnode)); + +_exit: + if (code) { + *ppSnap = NULL; + if (pSnap) { + if (pSnap->pNode) taosMemoryFree(pSnap->pNode); + if (pSnap->pINode) taosMemoryFree(pSnap->pINode); + taosMemoryFree(pSnap); + } + } else { + *ppSnap = pSnap; + } + + return code; +} + +void tsdbUntakeReadSnap2(STsdbReader* pReader, STsdbReadSnap* pSnap, bool proactive) { + STsdb* pTsdb = pReader->pTsdb; + + if (pSnap) { + if (pSnap->pMem) { + tsdbUnrefMemTable(pSnap->pMem, pSnap->pNode, proactive); + } + + if (pSnap->pIMem) { + tsdbUnrefMemTable(pSnap->pIMem, pSnap->pINode, proactive); + } + + tsdbFSUnref(pTsdb, &pSnap->fs); + if (pSnap->pNode) taosMemoryFree(pSnap->pNode); + if (pSnap->pINode) taosMemoryFree(pSnap->pINode); + + tsdbFSDestroyRefSnapshot(&pSnap->pfSetArray); + + taosMemoryFree(pSnap); + } + tsdbTrace("vgId:%d, untake read snapshot", TD_VID(pTsdb->pVnode)); +} + +// if failed, do nothing +void tsdbReaderSetId2(STsdbReader* pReader, const char* idstr) { + taosMemoryFreeClear(pReader->idStr); + pReader->idStr = taosStrdup(idstr); + pReader->status.fileIter.pLastBlockReader->mergeTree.idStr = pReader->idStr; +} + +void tsdbReaderSetCloseFlag2(STsdbReader* pReader) { pReader->code = TSDB_CODE_TSC_QUERY_CANCELLED; } diff --git a/source/dnode/vnode/src/tsdb/tsdbReadUtil.c b/source/dnode/vnode/src/tsdb/tsdbReadUtil.c new file mode 100644 index 0000000000000000000000000000000000000000..d560f0d5af7a00b775cd22a3f73380dd39918c59 --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbReadUtil.c @@ -0,0 +1,630 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbReadUtil.h" +#include "osDef.h" +#include "tsdb.h" +#include "tsdbDataFileRW.h" +#include "tsdbFS2.h" +#include "tsdbMerge.h" +#include "tsdbUtil2.h" +#include "tsimplehash.h" + +int32_t uidComparFunc(const void* p1, const void* p2) { + uint64_t pu1 = *(uint64_t*)p1; + uint64_t pu2 = *(uint64_t*)p2; + if (pu1 == pu2) { + return 0; + } else { + return (pu1 < pu2) ? -1 : 1; + } +} + +static int32_t initBlockScanInfoBuf(SBlockInfoBuf* pBuf, int32_t numOfTables) { + int32_t num = numOfTables / pBuf->numPerBucket; + int32_t remainder = numOfTables % pBuf->numPerBucket; + if (pBuf->pData == NULL) { + pBuf->pData = taosArrayInit(num + 1, POINTER_BYTES); + } + + for (int32_t i = 0; i < num; ++i) { + char* p = taosMemoryCalloc(pBuf->numPerBucket, sizeof(STableBlockScanInfo)); + if (p == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + taosArrayPush(pBuf->pData, &p); + } + + if (remainder > 0) { + char* p = taosMemoryCalloc(remainder, sizeof(STableBlockScanInfo)); + if (p == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + taosArrayPush(pBuf->pData, &p); + } + + pBuf->numOfTables = numOfTables; + + return TSDB_CODE_SUCCESS; +} + +int32_t ensureBlockScanInfoBuf(SBlockInfoBuf* pBuf, int32_t numOfTables) { + if (numOfTables <= pBuf->numOfTables) { + return TSDB_CODE_SUCCESS; + } + + if (pBuf->numOfTables > 0) { + STableBlockScanInfo** p = (STableBlockScanInfo**)taosArrayPop(pBuf->pData); + taosMemoryFree(*p); + pBuf->numOfTables /= pBuf->numPerBucket; + } + + int32_t num = (numOfTables - pBuf->numOfTables) / pBuf->numPerBucket; + int32_t remainder = (numOfTables - pBuf->numOfTables) % pBuf->numPerBucket; + if (pBuf->pData == NULL) { + pBuf->pData = taosArrayInit(num + 1, POINTER_BYTES); + } + + for (int32_t i = 0; i < num; ++i) { + char* p = taosMemoryCalloc(pBuf->numPerBucket, sizeof(STableBlockScanInfo)); + if (p == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + taosArrayPush(pBuf->pData, &p); + } + + if (remainder > 0) { + char* p = taosMemoryCalloc(remainder, sizeof(STableBlockScanInfo)); + if (p == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + taosArrayPush(pBuf->pData, &p); + } + + pBuf->numOfTables = numOfTables; + + return TSDB_CODE_SUCCESS; +} + +void clearBlockScanInfoBuf(SBlockInfoBuf* pBuf) { + size_t num = taosArrayGetSize(pBuf->pData); + for (int32_t i = 0; i < num; ++i) { + char** p = taosArrayGet(pBuf->pData, i); + taosMemoryFree(*p); + } + + taosArrayDestroy(pBuf->pData); +} + +void* getPosInBlockInfoBuf(SBlockInfoBuf* pBuf, int32_t index) { + int32_t bucketIndex = index / pBuf->numPerBucket; + char** pBucket = taosArrayGet(pBuf->pData, bucketIndex); + return (*pBucket) + (index % pBuf->numPerBucket) * sizeof(STableBlockScanInfo); +} + +STableBlockScanInfo* getTableBlockScanInfo(SSHashObj* pTableMap, uint64_t uid, const char* id) { + STableBlockScanInfo** p = tSimpleHashGet(pTableMap, &uid, sizeof(uid)); + if (p == NULL || *p == NULL) { + terrno = TSDB_CODE_INVALID_PARA; + int32_t size = tSimpleHashGetSize(pTableMap); + tsdbError("failed to locate the uid:%" PRIu64 " in query table uid list, total tables:%d, %s", uid, size, id); + return NULL; + } + + return *p; +} + +// NOTE: speedup the whole processing by preparing the buffer for STableBlockScanInfo in batch model +SSHashObj* createDataBlockScanInfo(STsdbReader* pTsdbReader, SBlockInfoBuf* pBuf, const STableKeyInfo* idList, + STableUidList* pUidList, int32_t numOfTables) { + // allocate buffer in order to load data blocks from file + // todo use simple hash instead, optimize the memory consumption + SSHashObj* pTableMap = tSimpleHashInit(numOfTables, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT)); + if (pTableMap == NULL) { + return NULL; + } + + int64_t st = taosGetTimestampUs(); + initBlockScanInfoBuf(pBuf, numOfTables); + + pUidList->tableUidList = taosMemoryMalloc(numOfTables * sizeof(uint64_t)); + if (pUidList->tableUidList == NULL) { + tSimpleHashCleanup(pTableMap); + return NULL; + } + + pUidList->currentIndex = 0; + + for (int32_t j = 0; j < numOfTables; ++j) { + STableBlockScanInfo* pScanInfo = getPosInBlockInfoBuf(pBuf, j); + + pScanInfo->uid = idList[j].uid; + pUidList->tableUidList[j] = idList[j].uid; + + if (ASCENDING_TRAVERSE(pTsdbReader->info.order)) { + int64_t skey = pTsdbReader->info.window.skey; + pScanInfo->lastKey = (skey > INT64_MIN) ? (skey - 1) : skey; + pScanInfo->lastKeyInStt = skey; + } else { + int64_t ekey = pTsdbReader->info.window.ekey; + pScanInfo->lastKey = (ekey < INT64_MAX) ? (ekey + 1) : ekey; + pScanInfo->lastKeyInStt = ekey; + } + + tSimpleHashPut(pTableMap, &pScanInfo->uid, sizeof(uint64_t), &pScanInfo, POINTER_BYTES); + tsdbTrace("%p check table uid:%" PRId64 " from lastKey:%" PRId64 " %s", pTsdbReader, pScanInfo->uid, + pScanInfo->lastKey, pTsdbReader->idStr); + } + + taosSort(pUidList->tableUidList, numOfTables, sizeof(uint64_t), uidComparFunc); + + pTsdbReader->cost.createScanInfoList = (taosGetTimestampUs() - st) / 1000.0; + tsdbDebug("%p create %d tables scan-info, size:%.2f Kb, elapsed time:%.2f ms, %s", pTsdbReader, numOfTables, + (sizeof(STableBlockScanInfo) * numOfTables) / 1024.0, pTsdbReader->cost.createScanInfoList, + pTsdbReader->idStr); + + return pTableMap; +} + +void resetAllDataBlockScanInfo(SSHashObj* pTableMap, int64_t ts, int32_t step) { + void* p = NULL; + int32_t iter = 0; + + while ((p = tSimpleHashIterate(pTableMap, p, &iter)) != NULL) { + STableBlockScanInfo* pInfo = *(STableBlockScanInfo**)p; + + pInfo->iterInit = false; + pInfo->iter.hasVal = false; + pInfo->iiter.hasVal = false; + + if (pInfo->iter.iter != NULL) { + pInfo->iter.iter = tsdbTbDataIterDestroy(pInfo->iter.iter); + } + + if (pInfo->iiter.iter != NULL) { + pInfo->iiter.iter = tsdbTbDataIterDestroy(pInfo->iiter.iter); + } + + pInfo->delSkyline = taosArrayDestroy(pInfo->delSkyline); + pInfo->lastKey = ts; + pInfo->lastKeyInStt = ts + step; + } +} + +void clearBlockScanInfo(STableBlockScanInfo* p) { + p->iterInit = false; + p->iter.hasVal = false; + p->iiter.hasVal = false; + + if (p->iter.iter != NULL) { + p->iter.iter = tsdbTbDataIterDestroy(p->iter.iter); + } + + if (p->iiter.iter != NULL) { + p->iiter.iter = tsdbTbDataIterDestroy(p->iiter.iter); + } + + p->delSkyline = taosArrayDestroy(p->delSkyline); + p->pBlockList = taosArrayDestroy(p->pBlockList); + p->pMemDelData = taosArrayDestroy(p->pMemDelData); + p->pfileDelData = taosArrayDestroy(p->pfileDelData); +} + +void destroyAllBlockScanInfo(SSHashObj* pTableMap) { + void* p = NULL; + int32_t iter = 0; + + while ((p = tSimpleHashIterate(pTableMap, p, &iter)) != NULL) { + clearBlockScanInfo(*(STableBlockScanInfo**)p); + } + + tSimpleHashCleanup(pTableMap); +} + +static void doCleanupInfoForNextFileset(STableBlockScanInfo* pScanInfo) { + // reset the index in last block when handing a new file + taosArrayClear(pScanInfo->pBlockList); + taosArrayClear(pScanInfo->pfileDelData); // del data from each file set +} + +void cleanupInfoFoxNextFileset(SSHashObj* pTableMap) { + STableBlockScanInfo** p = NULL; + + int32_t iter = 0; + while ((p = tSimpleHashIterate(pTableMap, p, &iter)) != NULL) { + doCleanupInfoForNextFileset(*p); + } +} + +// brin records iterator +void initBrinRecordIter(SBrinRecordIter* pIter, SDataFileReader* pReader, SArray* pList) { + memset(&pIter->block, 0, sizeof(SBrinBlock)); + memset(&pIter->record, 0, sizeof(SBrinRecord)); + pIter->blockIndex = -1; + pIter->recordIndex = -1; + + pIter->pReader = pReader; + pIter->pBrinBlockList = pList; +} + +SBrinRecord* getNextBrinRecord(SBrinRecordIter* pIter) { + if (pIter->blockIndex == -1 || (pIter->recordIndex + 1) >= TARRAY2_SIZE(pIter->block.numRow)) { + pIter->blockIndex += 1; + if (pIter->blockIndex >= taosArrayGetSize(pIter->pBrinBlockList)) { + return NULL; + } + + pIter->pCurrentBlk = taosArrayGet(pIter->pBrinBlockList, pIter->blockIndex); + + tBrinBlockClear(&pIter->block); + tsdbDataFileReadBrinBlock(pIter->pReader, pIter->pCurrentBlk, &pIter->block); + pIter->recordIndex = -1; + } + + pIter->recordIndex += 1; + tBrinBlockGet(&pIter->block, pIter->recordIndex, &pIter->record); + return &pIter->record; +} + +void clearBrinBlockIter(SBrinRecordIter* pIter) { tBrinBlockDestroy(&pIter->block); } + +// initialize the file block access order +// sort the file blocks according to the offset of each data block in the files +static void cleanupBlockOrderSupporter(SBlockOrderSupporter* pSup) { + taosMemoryFreeClear(pSup->numOfBlocksPerTable); + taosMemoryFreeClear(pSup->indexPerTable); + + for (int32_t i = 0; i < pSup->numOfTables; ++i) { + SBlockOrderWrapper* pBlockInfo = pSup->pDataBlockInfo[i]; + taosMemoryFreeClear(pBlockInfo); + } + + taosMemoryFreeClear(pSup->pDataBlockInfo); +} + +static int32_t initBlockOrderSupporter(SBlockOrderSupporter* pSup, int32_t numOfTables) { + pSup->numOfBlocksPerTable = taosMemoryCalloc(1, sizeof(int32_t) * numOfTables); + pSup->indexPerTable = taosMemoryCalloc(1, sizeof(int32_t) * numOfTables); + pSup->pDataBlockInfo = taosMemoryCalloc(1, POINTER_BYTES * numOfTables); + + if (pSup->numOfBlocksPerTable == NULL || pSup->indexPerTable == NULL || pSup->pDataBlockInfo == NULL) { + cleanupBlockOrderSupporter(pSup); + return TSDB_CODE_OUT_OF_MEMORY; + } + + return TSDB_CODE_SUCCESS; +} + +static int32_t fileDataBlockOrderCompar(const void* pLeft, const void* pRight, void* param) { + int32_t leftIndex = *(int32_t*)pLeft; + int32_t rightIndex = *(int32_t*)pRight; + + SBlockOrderSupporter* pSupporter = (SBlockOrderSupporter*)param; + + int32_t leftTableBlockIndex = pSupporter->indexPerTable[leftIndex]; + int32_t rightTableBlockIndex = pSupporter->indexPerTable[rightIndex]; + + if (leftTableBlockIndex > pSupporter->numOfBlocksPerTable[leftIndex]) { + /* left block is empty */ + return 1; + } else if (rightTableBlockIndex > pSupporter->numOfBlocksPerTable[rightIndex]) { + /* right block is empty */ + return -1; + } + + SBlockOrderWrapper* pLeftBlock = &pSupporter->pDataBlockInfo[leftIndex][leftTableBlockIndex]; + SBlockOrderWrapper* pRightBlock = &pSupporter->pDataBlockInfo[rightIndex][rightTableBlockIndex]; + + return pLeftBlock->offset > pRightBlock->offset ? 1 : -1; +} + +int32_t initBlockIterator(STsdbReader* pReader, SDataBlockIter* pBlockIter, int32_t numOfBlocks, SArray* pTableList) { + bool asc = ASCENDING_TRAVERSE(pReader->info.order); + + SBlockOrderSupporter sup = {0}; + pBlockIter->numOfBlocks = numOfBlocks; + taosArrayClear(pBlockIter->blockList); + + pBlockIter->pTableMap = pReader->status.pTableMap; + + // access data blocks according to the offset of each block in asc/desc order. + int32_t numOfTables = taosArrayGetSize(pTableList); + + int64_t st = taosGetTimestampUs(); + int32_t code = initBlockOrderSupporter(&sup, numOfTables); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + int32_t cnt = 0; + + for (int32_t i = 0; i < numOfTables; ++i) { + STableBlockScanInfo* pTableScanInfo = taosArrayGetP(pTableList, i); + // ASSERT(pTableScanInfo->pBlockList != NULL && taosArrayGetSize(pTableScanInfo->pBlockList) > 0); + + size_t num = taosArrayGetSize(pTableScanInfo->pBlockList); + sup.numOfBlocksPerTable[sup.numOfTables] = num; + + char* buf = taosMemoryMalloc(sizeof(SBlockOrderWrapper) * num); + if (buf == NULL) { + cleanupBlockOrderSupporter(&sup); + return TSDB_CODE_OUT_OF_MEMORY; + } + + sup.pDataBlockInfo[sup.numOfTables] = (SBlockOrderWrapper*)buf; + + for (int32_t k = 0; k < num; ++k) { + SBrinRecord* pRecord = taosArrayGet(pTableScanInfo->pBlockList, k); + sup.pDataBlockInfo[sup.numOfTables][k] = + (SBlockOrderWrapper){.uid = pTableScanInfo->uid, .offset = pRecord->blockOffset, .pInfo = pTableScanInfo}; + cnt++; + } + + sup.numOfTables += 1; + } + + if (numOfBlocks != cnt && sup.numOfTables != numOfTables) { + cleanupBlockOrderSupporter(&sup); + return TSDB_CODE_INVALID_PARA; + } + + // since there is only one table qualified, blocks are not sorted + if (sup.numOfTables == 1) { + for (int32_t i = 0; i < numOfBlocks; ++i) { + SFileDataBlockInfo blockInfo = {.uid = sup.pDataBlockInfo[0][i].uid, .tbBlockIdx = i}; + blockInfo.record = *(SBrinRecord*)taosArrayGet(sup.pDataBlockInfo[0][i].pInfo->pBlockList, i); + + taosArrayPush(pBlockIter->blockList, &blockInfo); + } + + int64_t et = taosGetTimestampUs(); + tsdbDebug("%p create blocks info struct completed for one table, %d blocks not sorted, elapsed time:%.2f ms %s", + pReader, numOfBlocks, (et - st) / 1000.0, pReader->idStr); + + pBlockIter->index = asc ? 0 : (numOfBlocks - 1); + cleanupBlockOrderSupporter(&sup); + return TSDB_CODE_SUCCESS; + } + + tsdbDebug("%p create data blocks info struct completed, %d blocks in %d tables %s", pReader, cnt, sup.numOfTables, + pReader->idStr); + + SMultiwayMergeTreeInfo* pTree = NULL; + + uint8_t ret = tMergeTreeCreate(&pTree, sup.numOfTables, &sup, fileDataBlockOrderCompar); + if (ret != TSDB_CODE_SUCCESS) { + cleanupBlockOrderSupporter(&sup); + return TSDB_CODE_OUT_OF_MEMORY; + } + + int32_t numOfTotal = 0; + while (numOfTotal < cnt) { + int32_t pos = tMergeTreeGetChosenIndex(pTree); + int32_t index = sup.indexPerTable[pos]++; + + SFileDataBlockInfo blockInfo = {.uid = sup.pDataBlockInfo[pos][index].uid, .tbBlockIdx = index}; + blockInfo.record = *(SBrinRecord*)taosArrayGet(sup.pDataBlockInfo[pos][index].pInfo->pBlockList, index); + + taosArrayPush(pBlockIter->blockList, &blockInfo); + + // set data block index overflow, in order to disable the offset comparator + if (sup.indexPerTable[pos] >= sup.numOfBlocksPerTable[pos]) { + sup.indexPerTable[pos] = sup.numOfBlocksPerTable[pos] + 1; + } + + numOfTotal += 1; + tMergeTreeAdjust(pTree, tMergeTreeGetAdjustIndex(pTree)); + } + + int64_t et = taosGetTimestampUs(); + tsdbDebug("%p %d data blocks access order completed, elapsed time:%.2f ms %s", pReader, numOfBlocks, + (et - st) / 1000.0, pReader->idStr); + cleanupBlockOrderSupporter(&sup); + taosMemoryFree(pTree); + + pBlockIter->index = asc ? 0 : (numOfBlocks - 1); + return TSDB_CODE_SUCCESS; +} + +bool blockIteratorNext(SDataBlockIter* pBlockIter, const char* idStr) { + bool asc = ASCENDING_TRAVERSE(pBlockIter->order); + + int32_t step = asc ? 1 : -1; + if ((pBlockIter->index >= pBlockIter->numOfBlocks - 1 && asc) || (pBlockIter->index <= 0 && (!asc))) { + return false; + } + + pBlockIter->index += step; + return true; +} + +typedef enum { + BLK_CHECK_CONTINUE = 0x1, + BLK_CHECK_QUIT = 0x2, +} ETombBlkCheckEnum; + +static int32_t doCheckTombBlock(STombBlock* pBlock, STsdbReader* pReader, int32_t numOfTables, int32_t* j, + STableBlockScanInfo** pScanInfo, ETombBlkCheckEnum* pRet) { + int32_t code = 0; + STombRecord record = {0}; + uint64_t uid = pReader->status.uidList.tableUidList[*j]; + + for (int32_t k = 0; k < TARRAY2_SIZE(pBlock->suid); ++k) { + code = tTombBlockGet(pBlock, k, &record); + if (code != TSDB_CODE_SUCCESS) { + *pRet = BLK_CHECK_QUIT; + return code; + } + + if (record.suid < pReader->info.suid) { + continue; + } + + if (record.suid > pReader->info.suid) { + *pRet = BLK_CHECK_QUIT; + return TSDB_CODE_SUCCESS; + } + + bool newTable = false; + if (uid < record.uid) { + while ((*j) < numOfTables && pReader->status.uidList.tableUidList[*j] < record.uid) { + (*j) += 1; + newTable = true; + } + + if ((*j) >= numOfTables) { + *pRet = BLK_CHECK_QUIT; + return TSDB_CODE_SUCCESS; + } + + uid = pReader->status.uidList.tableUidList[*j]; + } + + if (record.uid < uid) { + continue; + } + + ASSERT(record.suid == pReader->info.suid && uid == record.uid); + + if (newTable) { + (*pScanInfo) = getTableBlockScanInfo(pReader->status.pTableMap, uid, pReader->idStr); + if ((*pScanInfo)->pfileDelData == NULL) { + (*pScanInfo)->pfileDelData = taosArrayInit(4, sizeof(SDelData)); + } + } + + if (record.version <= pReader->info.verRange.maxVer) { + SDelData delData = {.version = record.version, .sKey = record.skey, .eKey = record.ekey}; + taosArrayPush((*pScanInfo)->pfileDelData, &delData); + } + } + + *pRet = BLK_CHECK_CONTINUE; + return TSDB_CODE_SUCCESS; +} + +// load tomb data API +static int32_t doLoadTombDataFromTombBlk(const TTombBlkArray* pTombBlkArray, STsdbReader* pReader, void* pFileReader, + bool isFile) { + int32_t code = 0; + STableUidList* pList = &pReader->status.uidList; + int32_t numOfTables = tSimpleHashGetSize(pReader->status.pTableMap); + + int32_t i = 0, j = 0; + while (i < pTombBlkArray->size && j < numOfTables) { + STombBlk* pTombBlk = &pTombBlkArray->data[i]; + if (pTombBlk->maxTbid.suid < pReader->info.suid) { + i += 1; + continue; + } + + if (pTombBlk->minTbid.suid > pReader->info.suid) { + break; + } + + ASSERT(pTombBlk->minTbid.suid <= pReader->info.suid && pTombBlk->maxTbid.suid >= pReader->info.suid); + if (pTombBlk->maxTbid.suid == pReader->info.suid && pTombBlk->maxTbid.uid < pList->tableUidList[0]) { + i += 1; + continue; + } + + if (pTombBlk->minTbid.suid == pReader->info.suid && pTombBlk->minTbid.uid > pList->tableUidList[numOfTables - 1]) { + break; + } + + STombBlock block = {0}; + code = isFile ? tsdbDataFileReadTombBlock(pFileReader, &pTombBlkArray->data[i], &block) + : tsdbSttFileReadTombBlock(pFileReader, &pTombBlkArray->data[i], &block); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + uint64_t uid = pReader->status.uidList.tableUidList[j]; + + STableBlockScanInfo* pScanInfo = getTableBlockScanInfo(pReader->status.pTableMap, uid, pReader->idStr); + if (pScanInfo->pfileDelData == NULL) { + pScanInfo->pfileDelData = taosArrayInit(4, sizeof(SDelData)); + } + + ETombBlkCheckEnum ret = 0; + code = doCheckTombBlock(&block, pReader, numOfTables, &j, &pScanInfo, &ret); + + tTombBlockDestroy(&block); + if (code != TSDB_CODE_SUCCESS || ret == BLK_CHECK_QUIT) { + return code; + } + + i += 1; + } + + return TSDB_CODE_SUCCESS; +} + +int32_t loadDataFileTombDataForAll(STsdbReader* pReader) { + if (pReader->status.pCurrentFileset == NULL || pReader->status.pCurrentFileset->farr[3] == NULL) { + return TSDB_CODE_SUCCESS; + } + + const TTombBlkArray* pBlkArray = NULL; + + int32_t code = tsdbDataFileReadTombBlk(pReader->pFileReader, &pBlkArray); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + return doLoadTombDataFromTombBlk(pBlkArray, pReader, pReader->pFileReader, true); +} + +int32_t loadSttTombDataForAll(STsdbReader* pReader, SSttFileReader* pSttFileReader, SSttBlockLoadInfo* pLoadInfo) { + const TTombBlkArray* pBlkArray = NULL; + int32_t code = tsdbSttFileReadTombBlk(pSttFileReader, &pBlkArray); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + return doLoadTombDataFromTombBlk(pBlkArray, pReader, pSttFileReader, false); +} + +void loadMemTombData(SArray** ppMemDelData, STbData* pMemTbData, STbData* piMemTbData, int64_t ver) { + if (*ppMemDelData == NULL) { + *ppMemDelData = taosArrayInit(4, sizeof(SDelData)); + } + + SArray* pMemDelData = *ppMemDelData; + + SDelData* p = NULL; + if (pMemTbData != NULL) { + p = pMemTbData->pHead; + while (p) { + if (p->version <= ver) { + taosArrayPush(pMemDelData, p); + } + + p = p->pNext; + } + } + + if (piMemTbData != NULL) { + p = piMemTbData->pHead; + while (p) { + if (p->version <= ver) { + taosArrayPush(pMemDelData, p); + } + p = p->pNext; + } + } +} diff --git a/source/dnode/vnode/src/tsdb/tsdbReadUtil.h b/source/dnode/vnode/src/tsdb/tsdbReadUtil.h new file mode 100644 index 0000000000000000000000000000000000000000..e7a1d6b038bb3c3f6fb629aed9507d2a87123160 --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbReadUtil.h @@ -0,0 +1,295 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef TDENGINE_TSDBREADUTIL_H +#define TDENGINE_TSDBREADUTIL_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "tsdbDataFileRW.h" +#include "tsdbUtil2.h" + +#define ASCENDING_TRAVERSE(o) (o == TSDB_ORDER_ASC) + +typedef enum { + READER_STATUS_SUSPEND = 0x1, + READER_STATUS_NORMAL = 0x2, +} EReaderStatus; + +typedef enum { + EXTERNAL_ROWS_PREV = 0x1, + EXTERNAL_ROWS_MAIN = 0x2, + EXTERNAL_ROWS_NEXT = 0x3, +} EContentData; + +typedef struct STsdbReaderInfo { + uint64_t suid; + STSchema* pSchema; + EReadMode readMode; + uint64_t rowsNum; + STimeWindow window; + SVersionRange verRange; + int16_t order; +} STsdbReaderInfo; + +typedef struct SBlockInfoBuf { + int32_t currentIndex; + SArray* pData; + int32_t numPerBucket; + int32_t numOfTables; +} SBlockInfoBuf; + +typedef struct { + STbDataIter* iter; + int32_t index; + bool hasVal; +} SIterInfo; + +typedef struct STableBlockScanInfo { + uint64_t uid; + TSKEY lastKey; + TSKEY lastKeyInStt; // last accessed key in stt + SArray* pBlockList; // block data index list, SArray + SArray* pMemDelData; // SArray + SArray* pfileDelData; // SArray from each file set + SIterInfo iter; // mem buffer skip list iterator + SIterInfo iiter; // imem buffer skip list iterator + SArray* delSkyline; // delete info for this table + int32_t fileDelIndex; // file block delete index + int32_t lastBlockDelIndex; // delete index for last block + bool iterInit; // whether to initialize the in-memory skip list iterator or not +} STableBlockScanInfo; + +typedef struct SResultBlockInfo { + SSDataBlock* pResBlock; + bool freeBlock; + int64_t capacity; +} SResultBlockInfo; + +typedef struct SCostSummary { + int64_t numOfBlocks; + double blockLoadTime; + double buildmemBlock; + int64_t headFileLoad; + double headFileLoadTime; + int64_t smaDataLoad; + double smaLoadTime; + int64_t lastBlockLoad; + double lastBlockLoadTime; + int64_t composedBlocks; + double buildComposedBlockTime; + double createScanInfoList; + double createSkylineIterTime; + double initLastBlockReader; +} SCostSummary; + +typedef struct STableUidList { + uint64_t* tableUidList; // access table uid list in uid ascending order list + int32_t currentIndex; // index in table uid list +} STableUidList; + +typedef struct { + int32_t numOfBlocks; + int32_t numOfLastFiles; +} SBlockNumber; + +typedef struct SBlockIndex { + int32_t ordinalIndex; + int64_t inFileOffset; + STimeWindow window; // todo replace it with overlap flag. +} SBlockIndex; + +typedef struct SBlockOrderWrapper { + int64_t uid; + int64_t offset; + STableBlockScanInfo* pInfo; +} SBlockOrderWrapper; + +typedef struct SBlockOrderSupporter { + SBlockOrderWrapper** pDataBlockInfo; + int32_t* indexPerTable; + int32_t* numOfBlocksPerTable; + int32_t numOfTables; +} SBlockOrderSupporter; + +typedef struct SBlockLoadSuppInfo { + TColumnDataAggArray colAggArray; + SColumnDataAgg tsColAgg; + int16_t* colId; + int16_t* slotId; + int32_t numOfCols; + char** buildBuf; // build string tmp buffer, todo remove it later after all string format being updated. + bool smaValid; // the sma on all queried columns are activated +} SBlockLoadSuppInfo; + +typedef struct SLastBlockReader { + STimeWindow window; + SVersionRange verRange; + int32_t order; + uint64_t uid; + SMergeTree mergeTree; + SSttBlockLoadInfo* pInfo; + int64_t currentKey; +} SLastBlockReader; + +typedef struct SFilesetIter { + int32_t numOfFiles; // number of total files + int32_t index; // current accessed index in the list + TFileSetArray* pFilesetList; // data file set list + int32_t order; + SLastBlockReader* pLastBlockReader; // last file block reader +} SFilesetIter; + +typedef struct SFileDataBlockInfo { + // index position in STableBlockScanInfo in order to check whether neighbor block overlaps with it + uint64_t uid; + int32_t tbBlockIdx; + SBrinRecord record; +} SFileDataBlockInfo; + +typedef struct SDataBlockIter { + int32_t numOfBlocks; + int32_t index; + SArray* blockList; // SArray + int32_t order; + SDataBlk block; // current SDataBlk data + SSHashObj* pTableMap; +} SDataBlockIter; + +typedef struct SFileBlockDumpInfo { + int32_t totalRows; + int32_t rowIndex; + int64_t lastKey; + bool allDumped; +} SFileBlockDumpInfo; + +typedef struct SReaderStatus { + bool loadFromFile; // check file stage + bool composedDataBlock; // the returned data block is a composed block or not + SSHashObj* pTableMap; // SHash + STableBlockScanInfo** pTableIter; // table iterator used in building in-memory buffer data blocks. + STableUidList uidList; // check tables in uid order, to avoid the repeatly load of blocks in STT. + SFileBlockDumpInfo fBlockDumpInfo; + STFileSet* pCurrentFileset; // current opened file set + SBlockData fileBlockData; + SFilesetIter fileIter; + SDataBlockIter blockIter; + SArray* pLDataIterArray; + SRowMerger merger; + SColumnInfoData* pPrimaryTsCol; // primary time stamp output col info data +} SReaderStatus; + +struct STsdbReader { + STsdb* pTsdb; + STsdbReaderInfo info; + TdThreadMutex readerMutex; + EReaderStatus flag; + int32_t code; + uint64_t rowsNum; + SResultBlockInfo resBlockInfo; + SReaderStatus status; + char* idStr; // query info handle, for debug purpose + int32_t type; // query type: 1. retrieve all data blocks, 2. retrieve direct prev|next rows + SBlockLoadSuppInfo suppInfo; + STsdbReadSnap* pReadSnap; + SCostSummary cost; + SHashObj** pIgnoreTables; + SSHashObj* pSchemaMap; // keep the retrieved schema info, to avoid the overhead by repeatly load schema + SDataFileReader* pFileReader; // the file reader + SBlockInfoBuf blockInfoBuf; + EContentData step; + STsdbReader* innerReader[2]; +}; + +typedef struct SBrinRecordIter { + SArray* pBrinBlockList; + SBrinBlk* pCurrentBlk; + int32_t blockIndex; + int32_t recordIndex; + SDataFileReader* pReader; + SBrinBlock block; + SBrinRecord record; +} SBrinRecordIter; + +int32_t uidComparFunc(const void* p1, const void* p2); + +STableBlockScanInfo* getTableBlockScanInfo(SSHashObj* pTableMap, uint64_t uid, const char* id); + +SSHashObj* createDataBlockScanInfo(STsdbReader* pTsdbReader, SBlockInfoBuf* pBuf, const STableKeyInfo* idList, + STableUidList* pUidList, int32_t numOfTables); +void clearBlockScanInfo(STableBlockScanInfo* p); +void destroyAllBlockScanInfo(SSHashObj* pTableMap); +void resetAllDataBlockScanInfo(SSHashObj* pTableMap, int64_t ts, int32_t step); +void cleanupInfoFoxNextFileset(SSHashObj* pTableMap); +int32_t ensureBlockScanInfoBuf(SBlockInfoBuf* pBuf, int32_t numOfTables); +void clearBlockScanInfoBuf(SBlockInfoBuf* pBuf); +void* getPosInBlockInfoBuf(SBlockInfoBuf* pBuf, int32_t index); + +// brin records iterator +void initBrinRecordIter(SBrinRecordIter* pIter, SDataFileReader* pReader, SArray* pList); +SBrinRecord* getNextBrinRecord(SBrinRecordIter* pIter); +void clearBrinBlockIter(SBrinRecordIter* pIter); + +// initialize block iterator API +int32_t initBlockIterator(STsdbReader* pReader, SDataBlockIter* pBlockIter, int32_t numOfBlocks, SArray* pTableList); +bool blockIteratorNext(SDataBlockIter* pBlockIter, const char* idStr); + +// load tomb data API (stt/mem only for one table each, tomb data from data files are load for all tables at one time) +void loadMemTombData(SArray** ppMemDelData, STbData* pMemTbData, STbData* piMemTbData, int64_t ver); +int32_t loadDataFileTombDataForAll(STsdbReader* pReader); +int32_t loadSttTombDataForAll(STsdbReader* pReader, SSttFileReader* pSttFileReader, SSttBlockLoadInfo* pLoadInfo); + +typedef struct { + SArray* pTombData; +} STableLoadInfo; + +struct SDataFileReader; + +typedef struct SCacheRowsReader { + STsdb* pTsdb; + STsdbReaderInfo info; + TdThreadMutex readerMutex; + SVnode* pVnode; + STSchema* pSchema; + STSchema* pCurrSchema; + uint64_t uid; + char** transferBuf; // todo remove it soon + int32_t numOfCols; + SArray* pCidList; + int32_t* pSlotIds; + int32_t type; + int32_t tableIndex; // currently returned result tables + STableKeyInfo* pTableList; // table id list + int32_t numOfTables; + uint64_t* uidList; + SSHashObj* pTableMap; + SArray* pLDataIterArray; + struct SDataFileReader* pFileReader; + STFileSet* pCurFileSet; + const TBrinBlkArray* pBlkArray; + STsdbReadSnap* pReadSnap; + char* idstr; + int64_t lastTs; +} SCacheRowsReader; + +int32_t tsdbCacheGetBatch(STsdb* pTsdb, tb_uid_t uid, SArray* pLastArray, SCacheRowsReader* pr, int8_t ltype); + +#ifdef __cplusplus +} +#endif + +#endif // TDENGINE_TSDBREADUTIL_H diff --git a/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c b/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c index 4b677533e73983d4da4ef44c230d59f2a8e4d847..89b7d019ae194f11c33151555f1a52c872f7fe5e 100644 --- a/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c +++ b/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c @@ -16,7 +16,7 @@ #include "tsdb.h" // =============== PAGE-WISE FILE =============== -static int32_t tsdbOpenFile(const char *path, int32_t szPage, int32_t flag, STsdbFD **ppFD) { +int32_t tsdbOpenFile(const char *path, int32_t szPage, int32_t flag, STsdbFD **ppFD) { int32_t code = 0; STsdbFD *pFD = NULL; @@ -68,7 +68,7 @@ _exit: return code; } -static void tsdbCloseFile(STsdbFD **ppFD) { +void tsdbCloseFile(STsdbFD **ppFD) { STsdbFD *pFD = *ppFD; if (pFD) { taosMemoryFree(pFD->pBuf); @@ -141,7 +141,7 @@ _exit: return code; } -static int32_t tsdbWriteFile(STsdbFD *pFD, int64_t offset, const uint8_t *pBuf, int64_t size) { +int32_t tsdbWriteFile(STsdbFD *pFD, int64_t offset, const uint8_t *pBuf, int64_t size) { int32_t code = 0; int64_t fOffset = LOGIC_TO_FILE_OFFSET(offset, pFD->szPage); int64_t pgno = OFFSET_PGNO(fOffset, pFD->szPage); @@ -173,7 +173,7 @@ _exit: return code; } -static int32_t tsdbReadFile(STsdbFD *pFD, int64_t offset, uint8_t *pBuf, int64_t size) { +int32_t tsdbReadFile(STsdbFD *pFD, int64_t offset, uint8_t *pBuf, int64_t size) { int32_t code = 0; int64_t n = 0; int64_t fOffset = LOGIC_TO_FILE_OFFSET(offset, pFD->szPage); @@ -202,7 +202,7 @@ _exit: return code; } -static int32_t tsdbFsyncFile(STsdbFD *pFD) { +int32_t tsdbFsyncFile(STsdbFD *pFD) { int32_t code = 0; code = tsdbWriteFilePage(pFD); @@ -749,7 +749,7 @@ int32_t tsdbDFileSetCopy(STsdb *pTsdb, SDFileSet *pSetFrom, SDFileSet *pSetTo) { int64_t size; TdFilePtr pOutFD = NULL; TdFilePtr PInFD = NULL; - int32_t szPage = pTsdb->pVnode->config.szPage; + int32_t szPage = pTsdb->pVnode->config.tsdbPageSize; char fNameFrom[TSDB_FILENAME_LEN]; char fNameTo[TSDB_FILENAME_LEN]; @@ -1489,7 +1489,7 @@ int32_t tsdbDelFReaderClose(SDelFReader **ppReader) { } int32_t tsdbReadDelData(SDelFReader *pReader, SDelIdx *pDelIdx, SArray *aDelData) { - return tsdbReadDelDatav1(pReader, pDelIdx, aDelData, INT64_MAX); + return tsdbReadDelDatav1(pReader, pDelIdx, aDelData, INT64_MAX); } int32_t tsdbReadDelDatav1(SDelFReader *pReader, SDelIdx *pDelIdx, SArray *aDelData, int64_t maxVer) { @@ -1517,10 +1517,10 @@ int32_t tsdbReadDelDatav1(SDelFReader *pReader, SDelIdx *pDelIdx, SArray *aDelDa if (delData.version > maxVer) { continue; } - if (taosArrayPush(aDelData, &delData) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } + if (taosArrayPush(aDelData, &delData) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } } ASSERT(n == size); diff --git a/source/dnode/vnode/src/tsdb/tsdbRetention.c b/source/dnode/vnode/src/tsdb/tsdbRetention.c index 7c7e1bd0f79254197e1c99dd5ca0544770656cea..a4d5715083a2b6d82f048b8950c2a74c8df11f6d 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRetention.c +++ b/source/dnode/vnode/src/tsdb/tsdbRetention.c @@ -14,101 +14,271 @@ */ #include "tsdb.h" +#include "tsdbFS2.h" -static bool tsdbShouldDoRetentionImpl(STsdb *pTsdb, int64_t now) { - for (int32_t iSet = 0; iSet < taosArrayGetSize(pTsdb->fs.aDFileSet); iSet++) { - SDFileSet *pSet = (SDFileSet *)taosArrayGet(pTsdb->fs.aDFileSet, iSet); - int32_t expLevel = tsdbFidLevel(pSet->fid, &pTsdb->keepCfg, now); - SDiskID did; +typedef struct { + STsdb *tsdb; + int32_t szPage; + int64_t now; + int64_t cid; - if (expLevel == pSet->diskId.level) continue; + TFileSetArray *fsetArr; + TFileOpArray fopArr[1]; - if (expLevel < 0) { - return true; - } else { - if (tfsAllocDisk(pTsdb->pVnode->pTfs, expLevel, &did) < 0) { - return false; - } + struct { + int32_t fsetArrIdx; + STFileSet *fset; + } ctx[1]; +} SRTNer; - if (did.level == pSet->diskId.level) continue; +static int32_t tsdbDoRemoveFileObject(SRTNer *rtner, const STFileObj *fobj) { + STFileOp op = { + .optype = TSDB_FOP_REMOVE, + .fid = fobj->f->fid, + .of = fobj->f[0], + }; - return true; - } + return TARRAY2_APPEND(rtner->fopArr, op); +} + +static int32_t tsdbDoCopyFile(SRTNer *rtner, const STFileObj *from, const STFile *to) { + int32_t code = 0; + int32_t lino = 0; + + char fname[TSDB_FILENAME_LEN]; + TdFilePtr fdFrom = NULL; + TdFilePtr fdTo = NULL; + + tsdbTFileName(rtner->tsdb, to, fname); + + fdFrom = taosOpenFile(from->fname, TD_FILE_READ); + if (fdFrom == NULL) code = terrno; + TSDB_CHECK_CODE(code, lino, _exit); + + fdTo = taosOpenFile(fname, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC); + if (fdTo == NULL) code = terrno; + TSDB_CHECK_CODE(code, lino, _exit); + + int64_t n = taosFSendFile(fdTo, fdFrom, 0, tsdbLogicToFileSize(from->f->size, rtner->szPage)); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + TSDB_CHECK_CODE(code, lino, _exit); } + taosCloseFile(&fdFrom); + taosCloseFile(&fdTo); - return false; +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(rtner->tsdb->pVnode), lino, code); + taosCloseFile(&fdFrom); + taosCloseFile(&fdTo); + } + return code; } -bool tsdbShouldDoRetention(STsdb *pTsdb, int64_t now) { - bool should; - taosThreadRwlockRdlock(&pTsdb->rwLock); - should = tsdbShouldDoRetentionImpl(pTsdb, now); - taosThreadRwlockUnlock(&pTsdb->rwLock); - return should; + +static int32_t tsdbDoMigrateFileObj(SRTNer *rtner, const STFileObj *fobj, const SDiskID *did) { + int32_t code = 0; + int32_t lino = 0; + STFileOp op = {0}; + + // remove old + op = (STFileOp){ + .optype = TSDB_FOP_REMOVE, + .fid = fobj->f->fid, + .of = fobj->f[0], + }; + + code = TARRAY2_APPEND(rtner->fopArr, op); + TSDB_CHECK_CODE(code, lino, _exit); + + // create new + op = (STFileOp){ + .optype = TSDB_FOP_CREATE, + .fid = fobj->f->fid, + .nf = + { + .type = fobj->f->type, + .did = did[0], + .fid = fobj->f->fid, + .cid = fobj->f->cid, + .size = fobj->f->size, + .stt[0] = + { + .level = fobj->f->stt[0].level, + }, + }, + }; + + code = TARRAY2_APPEND(rtner->fopArr, op); + TSDB_CHECK_CODE(code, lino, _exit); + + // do copy the file + code = tsdbDoCopyFile(rtner, fobj, &op.nf); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(rtner->tsdb->pVnode), lino, code); + } + return code; } -int32_t tsdbDoRetention(STsdb *pTsdb, int64_t now) { +typedef struct { + STsdb *tsdb; + int32_t sync; + int64_t now; +} SRtnArg; + +static int32_t tsdbDoRetentionBegin(SRtnArg *arg, SRTNer *rtner) { int32_t code = 0; int32_t lino = 0; - STsdbFS fs = {0}; - code = tsdbFSCopy(pTsdb, &fs); + STsdb *tsdb = arg->tsdb; + + rtner->tsdb = tsdb; + rtner->szPage = tsdb->pVnode->config.tsdbPageSize; + rtner->now = arg->now; + rtner->cid = tsdbFSAllocEid(tsdb->pFS); + + code = tsdbFSCreateCopySnapshot(tsdb->pFS, &rtner->fsetArr); TSDB_CHECK_CODE(code, lino, _exit); - for (int32_t iSet = 0; iSet < taosArrayGetSize(fs.aDFileSet); iSet++) { - SDFileSet *pSet = (SDFileSet *)taosArrayGet(fs.aDFileSet, iSet); - int32_t expLevel = tsdbFidLevel(pSet->fid, &pTsdb->keepCfg, now); - SDiskID did; - - if (expLevel < 0) { - taosMemoryFree(pSet->pHeadF); - taosMemoryFree(pSet->pDataF); - taosMemoryFree(pSet->pSmaF); - for (int32_t iStt = 0; iStt < pSet->nSttF; iStt++) { - taosMemoryFree(pSet->aSttF[iStt]); +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(rtner->tsdb->pVnode), lino, code); + } else { + tsdbInfo("vid:%d, cid:%" PRId64 ", %s done", TD_VID(rtner->tsdb->pVnode), rtner->cid, __func__); + } + return code; +} + +static int32_t tsdbDoRetentionEnd(SRTNer *rtner) { + int32_t code = 0; + int32_t lino = 0; + + if (TARRAY2_SIZE(rtner->fopArr) == 0) goto _exit; + + code = tsdbFSEditBegin(rtner->tsdb->pFS, rtner->fopArr, TSDB_FEDIT_MERGE); + TSDB_CHECK_CODE(code, lino, _exit); + + taosThreadRwlockWrlock(&rtner->tsdb->rwLock); + + code = tsdbFSEditCommit(rtner->tsdb->pFS); + if (code) { + taosThreadRwlockUnlock(&rtner->tsdb->rwLock); + TSDB_CHECK_CODE(code, lino, _exit); + } + + taosThreadRwlockUnlock(&rtner->tsdb->rwLock); + + TARRAY2_DESTROY(rtner->fopArr, NULL); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(rtner->tsdb->pVnode), lino, code); + } else { + tsdbInfo("vid:%d, cid:%" PRId64 ", %s done", TD_VID(rtner->tsdb->pVnode), rtner->cid, __func__); + } + tsdbFSDestroyCopySnapshot(&rtner->fsetArr); + return code; +} + +static int32_t tsdbDoRetention2(void *arg) { + int32_t code = 0; + int32_t lino = 0; + SRTNer rtner[1] = {0}; + + code = tsdbDoRetentionBegin(arg, rtner); + TSDB_CHECK_CODE(code, lino, _exit); + + for (rtner->ctx->fsetArrIdx = 0; rtner->ctx->fsetArrIdx < TARRAY2_SIZE(rtner->fsetArr); rtner->ctx->fsetArrIdx++) { + rtner->ctx->fset = TARRAY2_GET(rtner->fsetArr, rtner->ctx->fsetArrIdx); + + STFileObj *fobj; + int32_t expLevel = tsdbFidLevel(rtner->ctx->fset->fid, &rtner->tsdb->keepCfg, rtner->now); + + if (expLevel < 0) { // remove the file set + for (int32_t ftype = 0; (ftype < TSDB_FTYPE_MAX) && (fobj = rtner->ctx->fset->farr[ftype], 1); ++ftype) { + if (fobj == NULL) continue; + + code = tsdbDoRemoveFileObject(rtner, fobj); + TSDB_CHECK_CODE(code, lino, _exit); + } + + SSttLvl *lvl; + TARRAY2_FOREACH(rtner->ctx->fset->lvlArr, lvl) { + TARRAY2_FOREACH(lvl->fobjArr, fobj) { + code = tsdbDoRemoveFileObject(rtner, fobj); + TSDB_CHECK_CODE(code, lino, _exit); + } } - taosArrayRemove(fs.aDFileSet, iSet); - iSet--; + } else if (expLevel == 0) { + continue; } else { - if (expLevel == 0) continue; - if (tfsAllocDisk(pTsdb->pVnode->pTfs, expLevel, &did) < 0) { + SDiskID did; + + if (tfsAllocDisk(rtner->tsdb->pVnode->pTfs, expLevel, &did) < 0) { code = terrno; - goto _exit; + TSDB_CHECK_CODE(code, lino, _exit); } + tfsMkdirRecurAt(rtner->tsdb->pVnode->pTfs, rtner->tsdb->path, did); - if (did.level == pSet->diskId.level) continue; + // data + for (int32_t ftype = 0; ftype < TSDB_FTYPE_MAX && (fobj = rtner->ctx->fset->farr[ftype], 1); ++ftype) { + if (fobj == NULL) continue; - // copy file to new disk (todo) - SDFileSet fSet = *pSet; - fSet.diskId = did; + if (fobj->f->did.level == did.level) continue; + code = tsdbDoMigrateFileObj(rtner, fobj, &did); + TSDB_CHECK_CODE(code, lino, _exit); + } - code = tsdbDFileSetCopy(pTsdb, pSet, &fSet); - TSDB_CHECK_CODE(code, lino, _exit); + // stt + SSttLvl *lvl; + TARRAY2_FOREACH(rtner->ctx->fset->lvlArr, lvl) { + TARRAY2_FOREACH(lvl->fobjArr, fobj) { + if (fobj->f->did.level == did.level) continue; - code = tsdbFSUpsertFSet(&fs, &fSet); - TSDB_CHECK_CODE(code, lino, _exit); + code = tsdbDoMigrateFileObj(rtner, fobj, &did); + TSDB_CHECK_CODE(code, lino, _exit); + } + } } } - // do change fs - code = tsdbFSPrepareCommit(pTsdb, &fs); + code = tsdbDoRetentionEnd(rtner); TSDB_CHECK_CODE(code, lino, _exit); _exit: if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); - } else { - tsdbInfo("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__); + TSDB_ERROR_LOG(TD_VID(rtner->tsdb->pVnode), lino, code); } - tsdbFSDestroy(&fs); return code; } -static int32_t tsdbCommitRetentionImpl(STsdb *pTsdb) { return tsdbFSCommit(pTsdb); } +static void tsdbFreeRtnArg(void *arg) { + SRtnArg *rArg = (SRtnArg *)arg; + if (rArg->sync) { + tsem_post(&rArg->tsdb->pVnode->canCommit); + } + taosMemoryFree(arg); +} + +int32_t tsdbRetention(STsdb *tsdb, int64_t now, int32_t sync) { + SRtnArg *arg = taosMemoryMalloc(sizeof(*arg)); + if (arg == NULL) return TSDB_CODE_OUT_OF_MEMORY; + arg->tsdb = tsdb; + arg->sync = sync; + arg->now = now; -int32_t tsdbCommitRetention(STsdb *pTsdb) { - taosThreadRwlockWrlock(&pTsdb->rwLock); - tsdbCommitRetentionImpl(pTsdb); - taosThreadRwlockUnlock(&pTsdb->rwLock); - tsdbInfo("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__); - return 0; + if (sync) { + tsem_wait(&tsdb->pVnode->canCommit); + } + + int64_t taskid; + int32_t code = + tsdbFSScheduleBgTask(tsdb->pFS, TSDB_BG_TASK_RETENTION, tsdbDoRetention2, tsdbFreeRtnArg, arg, &taskid); + if (code) { + tsdbFreeRtnArg(arg); + } + return code; } \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index df2aebe45b079ee0438325f1996a59866d4cd842..011b9bd5a4c78e3d5392a3ed379b906f695bbab3 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -14,553 +14,519 @@ */ #include "tsdb.h" +#include "tsdbDataFileRW.h" +#include "tsdbFS2.h" +#include "tsdbFSetRW.h" +#include "tsdbIter.h" +#include "tsdbSttFileRW.h" extern int32_t tsdbUpdateTableSchema(SMeta* pMeta, int64_t suid, int64_t uid, SSkmInfo* pSkmInfo); -extern int32_t tsdbWriteDataBlock(SDataFWriter* pWriter, SBlockData* pBlockData, SMapData* mDataBlk, int8_t cmprAlg); -extern int32_t tsdbWriteSttBlock(SDataFWriter* pWriter, SBlockData* pBlockData, SArray* aSttBlk, int8_t cmprAlg); // STsdbSnapReader ======================================== struct STsdbSnapReader { - STsdb* pTsdb; - int64_t sver; - int64_t ever; - int8_t type; + STsdb* tsdb; + int64_t sver; + int64_t ever; + int8_t type; + uint8_t* aBuf[5]; + SSkmInfo skmTb[1]; + + TFileSetArray* fsetArr; - STsdbFS fs; - TABLEID tbid; - SSkmInfo skmTable; - - // timeseries data - int8_t dataDone; - int32_t fid; - - SDataFReader* pDataFReader; - STsdbDataIter2* iterList; - STsdbDataIter2* pIter; - SRBTree rbt; - SBlockData bData; - - // tombstone data - int8_t delDone; - SDelFReader* pDelFReader; - STsdbDataIter2* pTIter; - SArray* aDelData; + // context + struct { + int32_t fsetArrIdx; + STFileSet* fset; + bool isDataDone; + bool isTombDone; + } ctx[1]; + + // reader + SDataFileReader* dataReader; + TSttFileReaderArray sttReaderArr[1]; + + // iter + TTsdbIterArray dataIterArr[1]; + SIterMerger* dataIterMerger; + TTsdbIterArray tombIterArr[1]; + SIterMerger* tombIterMerger; + + // data + SBlockData blockData[1]; + STombBlock tombBlock[1]; }; -static int32_t tsdbSnapReadFileDataStart(STsdbSnapReader* pReader) { +static int32_t tsdbSnapReadFileSetOpenReader(STsdbSnapReader* reader) { int32_t code = 0; int32_t lino = 0; - SDFileSet* pSet = taosArraySearch(pReader->fs.aDFileSet, &(SDFileSet){.fid = pReader->fid}, tDFileSetCmprFn, TD_GT); - if (pSet == NULL) { - pReader->fid = INT32_MAX; - goto _exit; + ASSERT(reader->dataReader == NULL); + ASSERT(TARRAY2_SIZE(reader->sttReaderArr) == 0); + + // data + SDataFileReaderConfig config = { + .tsdb = reader->tsdb, + .szPage = reader->tsdb->pVnode->config.tsdbPageSize, + .bufArr = reader->aBuf, + }; + bool hasDataFile = false; + for (int32_t ftype = 0; ftype < TSDB_FTYPE_MAX; ftype++) { + if (reader->ctx->fset->farr[ftype] != NULL) { + hasDataFile = true; + config.files[ftype].exist = true; + config.files[ftype].file = reader->ctx->fset->farr[ftype]->f[0]; + } } - pReader->fid = pSet->fid; + if (hasDataFile) { + code = tsdbDataFileReaderOpen(NULL, &config, &reader->dataReader); + TSDB_CHECK_CODE(code, lino, _exit); + } - tRBTreeCreate(&pReader->rbt, tsdbDataIterCmprFn); + // stt + SSttLvl* lvl; + TARRAY2_FOREACH(reader->ctx->fset->lvlArr, lvl) { + STFileObj* fobj; + TARRAY2_FOREACH(lvl->fobjArr, fobj) { + SSttFileReader* sttReader; + SSttFileReaderConfig config = { + .tsdb = reader->tsdb, + .szPage = reader->tsdb->pVnode->config.tsdbPageSize, + .file = fobj->f[0], + .bufArr = reader->aBuf, + }; + + code = tsdbSttFileReaderOpen(fobj->fname, &config, &sttReader); + TSDB_CHECK_CODE(code, lino, _exit); - code = tsdbDataFReaderOpen(&pReader->pDataFReader, pReader->pTsdb, pSet); - TSDB_CHECK_CODE(code, lino, _exit); + code = TARRAY2_APPEND(reader->sttReaderArr, sttReader); + TSDB_CHECK_CODE(code, lino, _exit); + } + } - code = tsdbOpenDataFileDataIter(pReader->pDataFReader, &pReader->pIter); - TSDB_CHECK_CODE(code, lino, _exit); +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(reader->tsdb->pVnode), code, lino); + } + return code; +} + +static int32_t tsdbSnapReadFileSetCloseReader(STsdbSnapReader* reader) { + int32_t code = 0; + int32_t lino = 0; + + TARRAY2_CLEAR(reader->sttReaderArr, tsdbSttFileReaderClose); + tsdbDataFileReaderClose(&reader->dataReader); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(reader->tsdb->pVnode), code, lino); + } + return code; +} + +static int32_t tsdbSnapReadFileSetOpenIter(STsdbSnapReader* reader) { + int32_t code = 0; + int32_t lino = 0; + + ASSERT(reader->dataIterMerger == NULL); + ASSERT(reader->tombIterMerger == NULL); + ASSERT(TARRAY2_SIZE(reader->dataIterArr) == 0); + ASSERT(TARRAY2_SIZE(reader->tombIterArr) == 0); + + STsdbIter* iter; + STsdbIterConfig config = { + .filterByVersion = true, + .verRange[0] = reader->sver, + .verRange[1] = reader->ever, + }; + + // data file + if (reader->dataReader) { + // data + config.type = TSDB_ITER_TYPE_DATA; + config.dataReader = reader->dataReader; + + code = tsdbIterOpen(&config, &iter); + TSDB_CHECK_CODE(code, lino, _exit); - if (pReader->pIter) { - // iter to next with filter info (sver, ever) - code = tsdbDataIterNext2( - pReader->pIter, - &(STsdbFilterInfo){.flag = TSDB_FILTER_FLAG_BY_VERSION | TSDB_FILTER_FLAG_IGNORE_DROPPED_TABLE, // flag - .sver = pReader->sver, - .ever = pReader->ever}); + code = TARRAY2_APPEND(reader->dataIterArr, iter); TSDB_CHECK_CODE(code, lino, _exit); - if (pReader->pIter->rowInfo.suid || pReader->pIter->rowInfo.uid) { - // add to rbtree - tRBTreePut(&pReader->rbt, &pReader->pIter->rbtn); + // tomb + config.type = TSDB_ITER_TYPE_DATA_TOMB; + config.dataReader = reader->dataReader; - // add to iterList - pReader->pIter->next = pReader->iterList; - pReader->iterList = pReader->pIter; - } else { - tsdbCloseDataIter2(pReader->pIter); - } + code = tsdbIterOpen(&config, &iter); + TSDB_CHECK_CODE(code, lino, _exit); + + code = TARRAY2_APPEND(reader->tombIterArr, iter); + TSDB_CHECK_CODE(code, lino, _exit); } - for (int32_t iStt = 0; iStt < pSet->nSttF; ++iStt) { - code = tsdbOpenSttFileDataIter(pReader->pDataFReader, iStt, &pReader->pIter); + // stt file + SSttFileReader* sttReader; + TARRAY2_FOREACH(reader->sttReaderArr, sttReader) { + // data + config.type = TSDB_ITER_TYPE_STT; + config.sttReader = sttReader; + + code = tsdbIterOpen(&config, &iter); TSDB_CHECK_CODE(code, lino, _exit); - if (pReader->pIter) { - // iter to valid row - code = tsdbDataIterNext2( - pReader->pIter, - &(STsdbFilterInfo){.flag = TSDB_FILTER_FLAG_BY_VERSION | TSDB_FILTER_FLAG_IGNORE_DROPPED_TABLE, // flag - .sver = pReader->sver, - .ever = pReader->ever}); - TSDB_CHECK_CODE(code, lino, _exit); + code = TARRAY2_APPEND(reader->dataIterArr, iter); + TSDB_CHECK_CODE(code, lino, _exit); - if (pReader->pIter->rowInfo.suid || pReader->pIter->rowInfo.uid) { - // add to rbtree - tRBTreePut(&pReader->rbt, &pReader->pIter->rbtn); + // tomb + config.type = TSDB_ITER_TYPE_STT_TOMB; + config.sttReader = sttReader; - // add to iterList - pReader->pIter->next = pReader->iterList; - pReader->iterList = pReader->pIter; - } else { - tsdbCloseDataIter2(pReader->pIter); - } - } + code = tsdbIterOpen(&config, &iter); + TSDB_CHECK_CODE(code, lino, _exit); + + code = TARRAY2_APPEND(reader->tombIterArr, iter); + TSDB_CHECK_CODE(code, lino, _exit); } - pReader->pIter = NULL; + // merger + code = tsdbIterMergerOpen(reader->dataIterArr, &reader->dataIterMerger, false); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbIterMergerOpen(reader->tombIterArr, &reader->tombIterMerger, true); + TSDB_CHECK_CODE(code, lino, _exit); _exit: if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pReader->pTsdb->pVnode), __func__, lino, tstrerror(code)); - } else { - tsdbInfo("vgId:%d %s done, fid:%d", TD_VID(pReader->pTsdb->pVnode), __func__, pReader->fid); + TSDB_ERROR_LOG(TD_VID(reader->tsdb->pVnode), code, lino); } return code; } -static void tsdbSnapReadFileDataEnd(STsdbSnapReader* pReader) { - while (pReader->iterList) { - STsdbDataIter2* pIter = pReader->iterList; - pReader->iterList = pIter->next; - tsdbCloseDataIter2(pIter); - } - - tsdbDataFReaderClose(&pReader->pDataFReader); +static int32_t tsdbSnapReadFileSetCloseIter(STsdbSnapReader* reader) { + tsdbIterMergerClose(&reader->dataIterMerger); + tsdbIterMergerClose(&reader->tombIterMerger); + TARRAY2_CLEAR(reader->dataIterArr, tsdbIterClose); + TARRAY2_CLEAR(reader->tombIterArr, tsdbIterClose); + return 0; } -static int32_t tsdbSnapReadNextRow(STsdbSnapReader* pReader, SRowInfo** ppRowInfo) { +static int32_t tsdbSnapReadFileSetBegin(STsdbSnapReader* reader) { int32_t code = 0; int32_t lino = 0; - if (pReader->pIter) { - code = tsdbDataIterNext2(pReader->pIter, &(STsdbFilterInfo){.flag = TSDB_FILTER_FLAG_BY_VERSION | - TSDB_FILTER_FLAG_IGNORE_DROPPED_TABLE, // flag - .sver = pReader->sver, - .ever = pReader->ever}); - TSDB_CHECK_CODE(code, lino, _exit); + ASSERT(reader->ctx->fset == NULL); - if (pReader->pIter->rowInfo.suid == 0 && pReader->pIter->rowInfo.uid == 0) { - pReader->pIter = NULL; - } else { - SRBTreeNode* pNode = tRBTreeMin(&pReader->rbt); - if (pNode) { - int32_t c = tsdbDataIterCmprFn(&pReader->pIter->rbtn, pNode); - if (c > 0) { - tRBTreePut(&pReader->rbt, &pReader->pIter->rbtn); - pReader->pIter = NULL; - } else if (c == 0) { - ASSERT(0); - } - } - } - } + if (reader->ctx->fsetArrIdx < TARRAY2_SIZE(reader->fsetArr)) { + reader->ctx->fset = TARRAY2_GET(reader->fsetArr, reader->ctx->fsetArrIdx++); + reader->ctx->isDataDone = false; + reader->ctx->isTombDone = false; - if (pReader->pIter == NULL) { - SRBTreeNode* pNode = tRBTreeMin(&pReader->rbt); - if (pNode) { - tRBTreeDrop(&pReader->rbt, pNode); - pReader->pIter = TSDB_RBTN_TO_DATA_ITER(pNode); - } - } + code = tsdbSnapReadFileSetOpenReader(reader); + TSDB_CHECK_CODE(code, lino, _exit); - if (ppRowInfo) { - if (pReader->pIter) { - *ppRowInfo = &pReader->pIter->rowInfo; - } else { - *ppRowInfo = NULL; - } + code = tsdbSnapReadFileSetOpenIter(reader); + TSDB_CHECK_CODE(code, lino, _exit); } _exit: if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pReader->pTsdb->pVnode), __func__, lino, tstrerror(code)); + TSDB_ERROR_LOG(TD_VID(reader->tsdb->pVnode), code, lino); } return code; } -static int32_t tsdbSnapReadGetRow(STsdbSnapReader* pReader, SRowInfo** ppRowInfo) { - if (pReader->pIter) { - *ppRowInfo = &pReader->pIter->rowInfo; - return 0; - } - - return tsdbSnapReadNextRow(pReader, ppRowInfo); +static int32_t tsdbSnapReadFileSetEnd(STsdbSnapReader* reader) { + tsdbSnapReadFileSetCloseIter(reader); + tsdbSnapReadFileSetCloseReader(reader); + reader->ctx->fset = NULL; + return 0; } -static int32_t tsdbSnapCmprData(STsdbSnapReader* pReader, uint8_t** ppData) { +static int32_t tsdbSnapCmprData(STsdbSnapReader* reader, uint8_t** data) { int32_t code = 0; - - ASSERT(pReader->bData.nRow); + int32_t lino = 0; int32_t aBufN[5] = {0}; - code = tCmprBlockData(&pReader->bData, NO_COMPRESSION, NULL, NULL, pReader->aBuf, aBufN); - if (code) goto _exit; + code = tCmprBlockData(reader->blockData, NO_COMPRESSION, NULL, NULL, reader->aBuf, aBufN); + TSDB_CHECK_CODE(code, lino, _exit); int32_t size = aBufN[0] + aBufN[1] + aBufN[2] + aBufN[3]; - *ppData = taosMemoryMalloc(sizeof(SSnapDataHdr) + size); - if (*ppData == NULL) { + *data = taosMemoryMalloc(sizeof(SSnapDataHdr) + size); + if (*data == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; - goto _exit; + TSDB_CHECK_CODE(code, lino, _exit); } - SSnapDataHdr* pHdr = (SSnapDataHdr*)*ppData; - pHdr->type = pReader->type; + SSnapDataHdr* pHdr = (SSnapDataHdr*)*data; + pHdr->type = reader->type; pHdr->size = size; - memcpy(pHdr->data, pReader->aBuf[3], aBufN[3]); - memcpy(pHdr->data + aBufN[3], pReader->aBuf[2], aBufN[2]); + memcpy(pHdr->data, reader->aBuf[3], aBufN[3]); + memcpy(pHdr->data + aBufN[3], reader->aBuf[2], aBufN[2]); if (aBufN[1]) { - memcpy(pHdr->data + aBufN[3] + aBufN[2], pReader->aBuf[1], aBufN[1]); + memcpy(pHdr->data + aBufN[3] + aBufN[2], reader->aBuf[1], aBufN[1]); } if (aBufN[0]) { - memcpy(pHdr->data + aBufN[3] + aBufN[2] + aBufN[1], pReader->aBuf[0], aBufN[0]); + memcpy(pHdr->data + aBufN[3] + aBufN[2] + aBufN[1], reader->aBuf[0], aBufN[0]); } _exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(reader->tsdb->pVnode), lino, code); + } return code; } -static int32_t tsdbSnapReadTimeSeriesData(STsdbSnapReader* pReader, uint8_t** ppData) { - int32_t code = 0; - int32_t lino = 0; - - STsdb* pTsdb = pReader->pTsdb; +static int32_t tsdbSnapReadTimeSeriesData(STsdbSnapReader* reader, uint8_t** data) { + int32_t code = 0; + int32_t lino = 0; + SMetaInfo info; + + tBlockDataReset(reader->blockData); + + TABLEID tbid[1] = {0}; + for (SRowInfo* row; (row = tsdbIterMergerGetData(reader->dataIterMerger));) { + // skip dropped table + if (row->uid != tbid->uid) { + tbid->suid = row->suid; + tbid->uid = row->uid; + if (metaGetInfo(reader->tsdb->pVnode->pMeta, tbid->uid, &info, NULL) != 0) { + code = tsdbIterMergerSkipTableData(reader->dataIterMerger, tbid); + TSDB_CHECK_CODE(code, lino, _exit); + continue; + } + } - tBlockDataReset(&pReader->bData); + if (reader->blockData->suid == 0 && reader->blockData->uid == 0) { + code = tsdbUpdateSkmTb(reader->tsdb, (TABLEID*)row, reader->skmTb); + TSDB_CHECK_CODE(code, lino, _exit); - for (;;) { - // start a new file read if need - if (pReader->pDataFReader == NULL) { - code = tsdbSnapReadFileDataStart(pReader); + TABLEID tbid1 = { + .suid = row->suid, + .uid = row->suid ? 0 : row->uid, + }; + code = tBlockDataInit(reader->blockData, &tbid1, reader->skmTb->pTSchema, NULL, 0); TSDB_CHECK_CODE(code, lino, _exit); } - if (pReader->pDataFReader == NULL) break; - - SRowInfo* pRowInfo; - code = tsdbSnapReadGetRow(pReader, &pRowInfo); - TSDB_CHECK_CODE(code, lino, _exit); - - if (pRowInfo == NULL) { - tsdbSnapReadFileDataEnd(pReader); - continue; + if (!TABLE_SAME_SCHEMA(reader->blockData->suid, reader->blockData->uid, row->suid, row->uid)) { + break; } - code = tsdbUpdateTableSchema(pTsdb->pVnode->pMeta, pRowInfo->suid, pRowInfo->uid, &pReader->skmTable); + code = tBlockDataAppendRow(reader->blockData, &row->row, NULL, row->uid); TSDB_CHECK_CODE(code, lino, _exit); - code = tBlockDataInit(&pReader->bData, (TABLEID*)pRowInfo, pReader->skmTable.pTSchema, NULL, 0); + code = tsdbIterMergerNext(reader->dataIterMerger); TSDB_CHECK_CODE(code, lino, _exit); - do { - if (!TABLE_SAME_SCHEMA(pReader->bData.suid, pReader->bData.uid, pRowInfo->suid, pRowInfo->uid)) break; - - if (pReader->bData.uid && pReader->bData.uid != pRowInfo->uid) { - code = tRealloc((uint8_t**)&pReader->bData.aUid, sizeof(int64_t) * (pReader->bData.nRow + 1)); - TSDB_CHECK_CODE(code, lino, _exit); - - for (int32_t iRow = 0; iRow < pReader->bData.nRow; ++iRow) { - pReader->bData.aUid[iRow] = pReader->bData.uid; - } - pReader->bData.uid = 0; - } - - code = tBlockDataAppendRow(&pReader->bData, &pRowInfo->row, NULL, pRowInfo->uid); - TSDB_CHECK_CODE(code, lino, _exit); - - code = tsdbSnapReadNextRow(pReader, &pRowInfo); - TSDB_CHECK_CODE(code, lino, _exit); - - if (pReader->bData.nRow >= 81920) break; - } while (pRowInfo); - - ASSERT(pReader->bData.nRow > 0); - - break; + if (reader->blockData->nRow >= 81920) { + break; + } } - if (pReader->bData.nRow > 0) { - ASSERT(pReader->bData.suid || pReader->bData.uid); - - code = tsdbSnapCmprData(pReader, ppData); + if (reader->blockData->nRow > 0) { + ASSERT(reader->blockData->suid || reader->blockData->uid); + code = tsdbSnapCmprData(reader, data); TSDB_CHECK_CODE(code, lino, _exit); } _exit: if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); + TSDB_ERROR_LOG(TD_VID(reader->tsdb->pVnode), code, lino); } return code; } -static int32_t tsdbSnapCmprTombData(STsdbSnapReader* pReader, uint8_t** ppData) { +static int32_t tsdbSnapCmprTombData(STsdbSnapReader* reader, uint8_t** data) { int32_t code = 0; int32_t lino = 0; - int64_t size = sizeof(TABLEID); - for (int32_t iDelData = 0; iDelData < taosArrayGetSize(pReader->aDelData); ++iDelData) { - size += tPutDelData(NULL, taosArrayGet(pReader->aDelData, iDelData)); + int64_t size = sizeof(SSnapDataHdr); + for (int32_t i = 0; i < ARRAY_SIZE(reader->tombBlock->dataArr); i++) { + size += TARRAY2_DATA_LEN(reader->tombBlock->dataArr + i); } - uint8_t* pData = (uint8_t*)taosMemoryMalloc(sizeof(SSnapDataHdr) + size); - if (pData == NULL) { + data[0] = taosMemoryMalloc(size); + if (data[0] == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; TSDB_CHECK_CODE(code, lino, _exit); } - SSnapDataHdr* pHdr = (SSnapDataHdr*)pData; - pHdr->type = SNAP_DATA_DEL; - pHdr->size = size; + SSnapDataHdr* hdr = (SSnapDataHdr*)data[0]; + hdr->type = SNAP_DATA_DEL; + hdr->size = size; - TABLEID* pId = (TABLEID*)(pData + sizeof(SSnapDataHdr)); - *pId = pReader->tbid; - - size = sizeof(SSnapDataHdr) + sizeof(TABLEID); - for (int32_t iDelData = 0; iDelData < taosArrayGetSize(pReader->aDelData); ++iDelData) { - size += tPutDelData(pData + size, taosArrayGet(pReader->aDelData, iDelData)); + uint8_t* tdata = hdr->data; + for (int32_t i = 0; i < ARRAY_SIZE(reader->tombBlock->dataArr); i++) { + memcpy(tdata, TARRAY2_DATA(reader->tombBlock->dataArr + i), TARRAY2_DATA_LEN(reader->tombBlock->dataArr + i)); + tdata += TARRAY2_DATA_LEN(reader->tombBlock->dataArr + i); } _exit: if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pReader->pTsdb->pVnode), __func__, lino, tstrerror(code)); + TSDB_ERROR_LOG(TD_VID(reader->tsdb->pVnode), code, lino); } - *ppData = pData; return code; } -static void tsdbSnapReadGetTombData(STsdbSnapReader* pReader, SDelInfo** ppDelInfo) { - if (pReader->pTIter == NULL || (pReader->pTIter->delInfo.suid == 0 && pReader->pTIter->delInfo.uid == 0)) { - *ppDelInfo = NULL; - } else { - *ppDelInfo = &pReader->pTIter->delInfo; - } -} - -static int32_t tsdbSnapReadNextTombData(STsdbSnapReader* pReader, SDelInfo** ppDelInfo) { - int32_t code = 0; - int32_t lino = 0; +static int32_t tsdbSnapReadTombData(STsdbSnapReader* reader, uint8_t** data) { + int32_t code = 0; + int32_t lino = 0; + SMetaInfo info; - code = tsdbDataIterNext2( - pReader->pTIter, &(STsdbFilterInfo){.flag = TSDB_FILTER_FLAG_BY_VERSION | TSDB_FILTER_FLAG_IGNORE_DROPPED_TABLE, - .sver = pReader->sver, - .ever = pReader->ever}); - TSDB_CHECK_CODE(code, lino, _exit); + tTombBlockClear(reader->tombBlock); - if (ppDelInfo) { - tsdbSnapReadGetTombData(pReader, ppDelInfo); - } - -_exit: - if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pReader->pTsdb->pVnode), __func__, lino, tstrerror(code)); - } - return code; -} - -static int32_t tsdbSnapReadTombData(STsdbSnapReader* pReader, uint8_t** ppData) { - int32_t code = 0; - int32_t lino = 0; - - STsdb* pTsdb = pReader->pTsdb; - - // open tombstone data iter if need - if (pReader->pDelFReader == NULL) { - if (pReader->fs.pDelFile == NULL) goto _exit; - - // open - code = tsdbDelFReaderOpen(&pReader->pDelFReader, pReader->fs.pDelFile, pTsdb); - TSDB_CHECK_CODE(code, lino, _exit); - - code = tsdbOpenTombFileDataIter(pReader->pDelFReader, &pReader->pTIter); - TSDB_CHECK_CODE(code, lino, _exit); - - if (pReader->pTIter) { - code = tsdbSnapReadNextTombData(pReader, NULL); - TSDB_CHECK_CODE(code, lino, _exit); + TABLEID tbid[1] = {0}; + for (STombRecord* record; (record = tsdbIterMergerGetTombRecord(reader->tombIterMerger)) != NULL;) { + if (record->uid != tbid->uid) { + tbid->suid = record->suid; + tbid->uid = record->uid; + if (metaGetInfo(reader->tsdb->pVnode->pMeta, tbid->uid, &info, NULL) != 0) { + code = tsdbIterMergerSkipTableData(reader->tombIterMerger, tbid); + TSDB_CHECK_CODE(code, lino, _exit); + continue; + } } - } - - // loop to get tombstone data - SDelInfo* pDelInfo; - tsdbSnapReadGetTombData(pReader, &pDelInfo); - if (pDelInfo == NULL) goto _exit; - - pReader->tbid = *(TABLEID*)pDelInfo; - - if (pReader->aDelData) { - taosArrayClear(pReader->aDelData); - } else if ((pReader->aDelData = taosArrayInit(16, sizeof(SDelData))) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; + code = tTombBlockPut(reader->tombBlock, record); TSDB_CHECK_CODE(code, lino, _exit); - } - while (pDelInfo && pDelInfo->suid == pReader->tbid.suid && pDelInfo->uid == pReader->tbid.uid) { - if (taosArrayPush(pReader->aDelData, &pDelInfo->delData) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); + if (TOMB_BLOCK_SIZE(reader->tombBlock) >= 81920) { + break; } - - code = tsdbSnapReadNextTombData(pReader, &pDelInfo); - TSDB_CHECK_CODE(code, lino, _exit); } - // encode tombstone data - if (taosArrayGetSize(pReader->aDelData) > 0) { - code = tsdbSnapCmprTombData(pReader, ppData); + if (TOMB_BLOCK_SIZE(reader->tombBlock) > 0) { + code = tsdbSnapCmprTombData(reader, data); TSDB_CHECK_CODE(code, lino, _exit); } _exit: if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); - } else { - tsdbDebug("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__); + TSDB_ERROR_LOG(TD_VID(reader->tsdb->pVnode), code, lino); } return code; } -int32_t tsdbSnapReaderOpen(STsdb* pTsdb, int64_t sver, int64_t ever, int8_t type, STsdbSnapReader** ppReader) { +int32_t tsdbSnapReaderOpen(STsdb* tsdb, int64_t sver, int64_t ever, int8_t type, STsdbSnapReader** reader) { int32_t code = 0; int32_t lino = 0; - // alloc - STsdbSnapReader* pReader = (STsdbSnapReader*)taosMemoryCalloc(1, sizeof(*pReader)); - if (pReader == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - pReader->pTsdb = pTsdb; - pReader->sver = sver; - pReader->ever = ever; - pReader->type = type; - - taosThreadRwlockRdlock(&pTsdb->rwLock); - code = tsdbFSRef(pTsdb, &pReader->fs); - if (code) { - taosThreadRwlockUnlock(&pTsdb->rwLock); - TSDB_CHECK_CODE(code, lino, _exit); - } - taosThreadRwlockUnlock(&pTsdb->rwLock); + reader[0] = (STsdbSnapReader*)taosMemoryCalloc(1, sizeof(*reader[0])); + if (reader[0] == NULL) return TSDB_CODE_OUT_OF_MEMORY; - // init - pReader->fid = INT32_MIN; + reader[0]->tsdb = tsdb; + reader[0]->sver = sver; + reader[0]->ever = ever; + reader[0]->type = type; - code = tBlockDataCreate(&pReader->bData); + code = tsdbFSCreateRefSnapshot(tsdb->pFS, &reader[0]->fsetArr); TSDB_CHECK_CODE(code, lino, _exit); _exit: if (code) { - tsdbError("vgId:%d %s failed at line %d since %s, sver:%" PRId64 " ever:%" PRId64 " type:%d", TD_VID(pTsdb->pVnode), + tsdbError("vgId:%d %s failed at line %d since %s, sver:%" PRId64 " ever:%" PRId64 " type:%d", TD_VID(tsdb->pVnode), __func__, lino, tstrerror(code), sver, ever, type); - if (pReader) { - tBlockDataDestroy(&pReader->bData); - tsdbFSUnref(pTsdb, &pReader->fs); - taosMemoryFree(pReader); - pReader = NULL; - } + tsdbFSDestroyRefSnapshot(&reader[0]->fsetArr); + taosMemoryFree(reader[0]); + reader[0] = NULL; } else { - tsdbInfo("vgId:%d %s done, sver:%" PRId64 " ever:%" PRId64 " type:%d", TD_VID(pTsdb->pVnode), __func__, sver, ever, + tsdbInfo("vgId:%d %s done, sver:%" PRId64 " ever:%" PRId64 " type:%d", TD_VID(tsdb->pVnode), __func__, sver, ever, type); } - *ppReader = pReader; return code; } -int32_t tsdbSnapReaderClose(STsdbSnapReader** ppReader) { +int32_t tsdbSnapReaderClose(STsdbSnapReader** reader) { + if (reader[0] == NULL) return 0; + int32_t code = 0; int32_t lino = 0; - STsdbSnapReader* pReader = *ppReader; - STsdb* pTsdb = pReader->pTsdb; + STsdb* tsdb = reader[0]->tsdb; - // tombstone - if (pReader->pTIter) { - tsdbCloseDataIter2(pReader->pTIter); - pReader->pTIter = NULL; - } - if (pReader->pDelFReader) { - tsdbDelFReaderClose(&pReader->pDelFReader); - } - taosArrayDestroy(pReader->aDelData); + tTombBlockDestroy(reader[0]->tombBlock); + tBlockDataDestroy(reader[0]->blockData); - // timeseries - while (pReader->iterList) { - STsdbDataIter2* pIter = pReader->iterList; - pReader->iterList = pIter->next; - tsdbCloseDataIter2(pIter); - } - if (pReader->pDataFReader) { - tsdbDataFReaderClose(&pReader->pDataFReader); - } - tBlockDataDestroy(&pReader->bData); + tsdbIterMergerClose(&reader[0]->dataIterMerger); + tsdbIterMergerClose(&reader[0]->tombIterMerger); + TARRAY2_DESTROY(reader[0]->dataIterArr, tsdbIterClose); + TARRAY2_DESTROY(reader[0]->tombIterArr, tsdbIterClose); + TARRAY2_DESTROY(reader[0]->sttReaderArr, tsdbSttFileReaderClose); + tsdbDataFileReaderClose(&reader[0]->dataReader); + + tsdbFSDestroyRefSnapshot(&reader[0]->fsetArr); + tDestroyTSchema(reader[0]->skmTb->pTSchema); - // other - tDestroyTSchema(pReader->skmTable.pTSchema); - tsdbFSUnref(pReader->pTsdb, &pReader->fs); - for (int32_t iBuf = 0; iBuf < sizeof(pReader->aBuf) / sizeof(pReader->aBuf[0]); iBuf++) { - tFree(pReader->aBuf[iBuf]); + for (int32_t i = 0; i < ARRAY_SIZE(reader[0]->aBuf); ++i) { + tFree(reader[0]->aBuf[i]); } - taosMemoryFree(pReader); + + taosMemoryFree(reader[0]); + reader[0] = NULL; _exit: if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); + TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); } else { - tsdbDebug("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__); + tsdbDebug("vgId:%d %s done", TD_VID(tsdb->pVnode), __func__); } - *ppReader = NULL; return code; } -int32_t tsdbSnapRead(STsdbSnapReader* pReader, uint8_t** ppData) { +int32_t tsdbSnapRead(STsdbSnapReader* reader, uint8_t** data) { int32_t code = 0; int32_t lino = 0; - *ppData = NULL; + data[0] = NULL; - // read data file - if (!pReader->dataDone) { - code = tsdbSnapReadTimeSeriesData(pReader, ppData); - TSDB_CHECK_CODE(code, lino, _exit); - if (*ppData) { - goto _exit; - } else { - pReader->dataDone = 1; + for (;;) { + if (reader->ctx->fset == NULL) { + code = tsdbSnapReadFileSetBegin(reader); + TSDB_CHECK_CODE(code, lino, _exit); + + if (reader->ctx->fset == NULL) { + break; + } } - } - // read del file - if (!pReader->delDone) { - code = tsdbSnapReadTombData(pReader, ppData); - TSDB_CHECK_CODE(code, lino, _exit); - if (*ppData) { - goto _exit; - } else { - pReader->delDone = 1; + if (!reader->ctx->isDataDone) { + code = tsdbSnapReadTimeSeriesData(reader, data); + TSDB_CHECK_CODE(code, lino, _exit); + if (data[0]) { + goto _exit; + } else { + reader->ctx->isDataDone = true; + } + } + + if (!reader->ctx->isTombDone) { + code = tsdbSnapReadTombData(reader, data); + TSDB_CHECK_CODE(code, lino, _exit); + if (data[0]) { + goto _exit; + } else { + reader->ctx->isTombDone = true; + } } + + code = tsdbSnapReadFileSetEnd(reader); + TSDB_CHECK_CODE(code, lino, _exit); } _exit: if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pReader->pTsdb->pVnode), __func__, lino, tstrerror(code)); + TSDB_ERROR_LOG(TD_VID(reader->tsdb->pVnode), code, lino); } else { - tsdbDebug("vgId:%d %s done", TD_VID(pReader->pTsdb->pVnode), __func__); + tsdbDebug("vgId:%d %s done", TD_VID(reader->tsdb->pVnode), __func__); } return code; } // STsdbSnapWriter ======================================== struct STsdbSnapWriter { - STsdb* pTsdb; + STsdb* tsdb; int64_t sver; int64_t ever; int32_t minutes; @@ -569,973 +535,595 @@ struct STsdbSnapWriter { int32_t maxRow; int8_t cmprAlg; int64_t commitID; + int32_t szPage; + int64_t compactVersion; + int64_t now; uint8_t* aBuf[5]; - STsdbFS fs; - TABLEID tbid; - - // time-series data - SBlockData inData; - - int32_t fid; - SSkmInfo skmTable; - - /* reader */ - SDataFReader* pDataFReader; - STsdbDataIter2* iterList; - STsdbDataIter2* pDIter; - STsdbDataIter2* pSIter; - SRBTree rbt; // SRBTree - - /* writer */ - SDataFWriter* pDataFWriter; - SArray* aBlockIdx; - SMapData mDataBlk; // SMapData - SArray* aSttBlk; // SArray - SBlockData bData; - SBlockData sData; - - // tombstone data - /* reader */ - SDelFReader* pDelFReader; - STsdbDataIter2* pTIter; - - /* writer */ - SDelFWriter* pDelFWriter; - SArray* aDelIdx; - SArray* aDelData; + TFileSetArray* fsetArr; + TFileOpArray fopArr[1]; + + struct { + bool fsetWriteBegin; + int32_t fid; + STFileSet* fset; + SDiskID did; + bool hasData; + bool hasTomb; + + // reader + SDataFileReader* dataReader; + TSttFileReaderArray sttReaderArr[1]; + + // iter/merger + TTsdbIterArray dataIterArr[1]; + SIterMerger* dataIterMerger; + TTsdbIterArray tombIterArr[1]; + SIterMerger* tombIterMerger; + + // writer + SFSetWriter* fsetWriter; + } ctx[1]; }; -// SNAP_DATA_TSDB -static int32_t tsdbSnapWriteTableDataStart(STsdbSnapWriter* pWriter, TABLEID* pId) { +// APIs +static int32_t tsdbSnapWriteTimeSeriesRow(STsdbSnapWriter* writer, SRowInfo* row) { int32_t code = 0; int32_t lino = 0; - if (pId) { - pWriter->tbid = *pId; - } else { - pWriter->tbid = (TABLEID){INT64_MAX, INT64_MAX}; - } - - if (pWriter->pDIter) { - STsdbDataIter2* pIter = pWriter->pDIter; - - // assert last table data end - ASSERT(pIter->dIter.iRow >= pIter->dIter.bData.nRow); - ASSERT(pIter->dIter.iDataBlk >= pIter->dIter.mDataBlk.nItem); - - for (;;) { - if (pIter->dIter.iBlockIdx >= taosArrayGetSize(pIter->dIter.aBlockIdx)) { - pWriter->pDIter = NULL; - break; - } - - SBlockIdx* pBlockIdx = (SBlockIdx*)taosArrayGet(pIter->dIter.aBlockIdx, pIter->dIter.iBlockIdx); - - int32_t c = tTABLEIDCmprFn(pBlockIdx, &pWriter->tbid); - if (c < 0) { - code = tsdbReadDataBlk(pIter->dIter.pReader, pBlockIdx, &pIter->dIter.mDataBlk); - TSDB_CHECK_CODE(code, lino, _exit); - - SBlockIdx* pNewBlockIdx = taosArrayReserve(pWriter->aBlockIdx, 1); - if (pNewBlockIdx == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - - pNewBlockIdx->suid = pBlockIdx->suid; - pNewBlockIdx->uid = pBlockIdx->uid; - - code = tsdbWriteDataBlk(pWriter->pDataFWriter, &pIter->dIter.mDataBlk, pNewBlockIdx); - TSDB_CHECK_CODE(code, lino, _exit); - - pIter->dIter.iBlockIdx++; - } else if (c == 0) { - code = tsdbReadDataBlk(pIter->dIter.pReader, pBlockIdx, &pIter->dIter.mDataBlk); - TSDB_CHECK_CODE(code, lino, _exit); - - pIter->dIter.iDataBlk = 0; - pIter->dIter.iBlockIdx++; - - break; - } else { - pIter->dIter.iDataBlk = pIter->dIter.mDataBlk.nItem; - break; - } + while (writer->ctx->hasData) { + SRowInfo* row1 = tsdbIterMergerGetData(writer->ctx->dataIterMerger); + if (row1 == NULL) { + writer->ctx->hasData = false; + break; } - } - if (pId) { - code = tsdbUpdateTableSchema(pWriter->pTsdb->pVnode->pMeta, pId->suid, pId->uid, &pWriter->skmTable); - TSDB_CHECK_CODE(code, lino, _exit); - - tMapDataReset(&pWriter->mDataBlk); - - code = tBlockDataInit(&pWriter->bData, pId, pWriter->skmTable.pTSchema, NULL, 0); - TSDB_CHECK_CODE(code, lino, _exit); - } - - if (!TABLE_SAME_SCHEMA(pWriter->tbid.suid, pWriter->tbid.uid, pWriter->sData.suid, pWriter->sData.uid)) { - if ((pWriter->sData.nRow > 0)) { - code = tsdbWriteSttBlock(pWriter->pDataFWriter, &pWriter->sData, pWriter->aSttBlk, pWriter->cmprAlg); + int32_t c = tRowInfoCmprFn(row1, row); + if (c <= 0) { + code = tsdbFSetWriteRow(writer->ctx->fsetWriter, row1); TSDB_CHECK_CODE(code, lino, _exit); - } - if (pId) { - TABLEID id = {.suid = pWriter->tbid.suid, .uid = pWriter->tbid.suid ? 0 : pWriter->tbid.uid}; - code = tBlockDataInit(&pWriter->sData, &id, pWriter->skmTable.pTSchema, NULL, 0); + code = tsdbIterMergerNext(writer->ctx->dataIterMerger); TSDB_CHECK_CODE(code, lino, _exit); + } else { + break; } } -_exit: - if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); - } else { - tsdbTrace("vgId:%d %s done, suid:%" PRId64 " uid:%" PRId64, TD_VID(pWriter->pTsdb->pVnode), __func__, - pWriter->tbid.suid, pWriter->tbid.uid); + if (row->suid == INT64_MAX) { + ASSERT(writer->ctx->hasData == false); + goto _exit; } - return code; -} -static int32_t tsdbSnapWriteTableRowImpl(STsdbSnapWriter* pWriter, TSDBROW* pRow) { - int32_t code = 0; - int32_t lino = 0; - - code = tBlockDataAppendRow(&pWriter->bData, pRow, pWriter->skmTable.pTSchema, pWriter->tbid.uid); + code = tsdbFSetWriteRow(writer->ctx->fsetWriter, row); TSDB_CHECK_CODE(code, lino, _exit); - if (pWriter->bData.nRow >= pWriter->maxRow) { - code = tsdbWriteDataBlock(pWriter->pDataFWriter, &pWriter->bData, &pWriter->mDataBlk, pWriter->cmprAlg); - TSDB_CHECK_CODE(code, lino, _exit); - } - _exit: if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + TSDB_ERROR_LOG(TD_VID(writer->tsdb->pVnode), lino, code); } return code; } -static int32_t tsdbSnapWriteTableRow(STsdbSnapWriter* pWriter, TSDBROW* pRow) { +static int32_t tsdbSnapWriteFileSetOpenReader(STsdbSnapWriter* writer) { int32_t code = 0; int32_t lino = 0; - TSDBKEY inKey = pRow ? TSDBROW_KEY(pRow) : TSDBKEY_MAX; + ASSERT(writer->ctx->dataReader == NULL); + ASSERT(TARRAY2_SIZE(writer->ctx->sttReaderArr) == 0); - if (pWriter->pDIter == NULL || (pWriter->pDIter->dIter.iRow >= pWriter->pDIter->dIter.bData.nRow && - pWriter->pDIter->dIter.iDataBlk >= pWriter->pDIter->dIter.mDataBlk.nItem)) { - goto _write_row; - } else { - for (;;) { - while (pWriter->pDIter->dIter.iRow < pWriter->pDIter->dIter.bData.nRow) { - TSDBROW row = tsdbRowFromBlockData(&pWriter->pDIter->dIter.bData, pWriter->pDIter->dIter.iRow); - - int32_t c = tsdbKeyCmprFn(&inKey, &TSDBROW_KEY(&row)); - if (c < 0) { - goto _write_row; - } else if (c > 0) { - code = tsdbSnapWriteTableRowImpl(pWriter, &row); - TSDB_CHECK_CODE(code, lino, _exit); - - pWriter->pDIter->dIter.iRow++; - } else { - ASSERT(0); - } - } + if (writer->ctx->fset) { + // open data reader + SDataFileReaderConfig dataFileReaderConfig = { + .tsdb = writer->tsdb, + .bufArr = writer->aBuf, + .szPage = writer->szPage, + }; - for (;;) { - if (pWriter->pDIter->dIter.iDataBlk >= pWriter->pDIter->dIter.mDataBlk.nItem) goto _write_row; - - // FIXME: Here can be slow, use array instead - SDataBlk dataBlk; - tMapDataGetItemByIdx(&pWriter->pDIter->dIter.mDataBlk, pWriter->pDIter->dIter.iDataBlk, &dataBlk, tGetDataBlk); - - int32_t c = tDataBlkCmprFn(&dataBlk, &(SDataBlk){.minKey = inKey, .maxKey = inKey}); - if (c > 0) { - goto _write_row; - } else if (c < 0) { - if (pWriter->bData.nRow > 0) { - code = tsdbWriteDataBlock(pWriter->pDataFWriter, &pWriter->bData, &pWriter->mDataBlk, pWriter->cmprAlg); - TSDB_CHECK_CODE(code, lino, _exit); - } - - tMapDataPutItem(&pWriter->mDataBlk, &dataBlk, tPutDataBlk); - pWriter->pDIter->dIter.iDataBlk++; - } else { - code = tsdbReadDataBlockEx(pWriter->pDataFReader, &dataBlk, &pWriter->pDIter->dIter.bData); - TSDB_CHECK_CODE(code, lino, _exit); - - pWriter->pDIter->dIter.iRow = 0; - pWriter->pDIter->dIter.iDataBlk++; - break; - } + for (int32_t ftype = 0; ftype < TSDB_FTYPE_MAX; ++ftype) { + if (writer->ctx->fset->farr[ftype] == NULL) { + continue; } + + dataFileReaderConfig.files[ftype].exist = true; + dataFileReaderConfig.files[ftype].file = writer->ctx->fset->farr[ftype]->f[0]; } - } -_write_row: - if (pRow) { - code = tsdbSnapWriteTableRowImpl(pWriter, pRow); + code = tsdbDataFileReaderOpen(NULL, &dataFileReaderConfig, &writer->ctx->dataReader); TSDB_CHECK_CODE(code, lino, _exit); - } - -_exit: - if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); - } - return code; -} - -static int32_t tsdbSnapWriteTableDataEnd(STsdbSnapWriter* pWriter) { - int32_t code = 0; - int32_t lino = 0; - // write a NULL row to end current table data write - code = tsdbSnapWriteTableRow(pWriter, NULL); - TSDB_CHECK_CODE(code, lino, _exit); - - if (pWriter->bData.nRow > 0) { - if (pWriter->bData.nRow < pWriter->minRow) { - ASSERT(TABLE_SAME_SCHEMA(pWriter->sData.suid, pWriter->sData.uid, pWriter->tbid.suid, pWriter->tbid.uid)); - for (int32_t iRow = 0; iRow < pWriter->bData.nRow; iRow++) { - code = - tBlockDataAppendRow(&pWriter->sData, &tsdbRowFromBlockData(&pWriter->bData, iRow), NULL, pWriter->tbid.uid); + // open stt reader array + SSttLvl* lvl; + TARRAY2_FOREACH(writer->ctx->fset->lvlArr, lvl) { + STFileObj* fobj; + TARRAY2_FOREACH(lvl->fobjArr, fobj) { + SSttFileReader* reader; + SSttFileReaderConfig sttFileReaderConfig = { + .tsdb = writer->tsdb, + .szPage = writer->szPage, + .bufArr = writer->aBuf, + .file = fobj->f[0], + }; + + code = tsdbSttFileReaderOpen(fobj->fname, &sttFileReaderConfig, &reader); TSDB_CHECK_CODE(code, lino, _exit); - if (pWriter->sData.nRow >= pWriter->maxRow) { - code = tsdbWriteSttBlock(pWriter->pDataFWriter, &pWriter->sData, pWriter->aSttBlk, pWriter->cmprAlg); - TSDB_CHECK_CODE(code, lino, _exit); - } + code = TARRAY2_APPEND(writer->ctx->sttReaderArr, reader); + TSDB_CHECK_CODE(code, lino, _exit); } - - tBlockDataClear(&pWriter->bData); - } else { - code = tsdbWriteDataBlock(pWriter->pDataFWriter, &pWriter->bData, &pWriter->mDataBlk, pWriter->cmprAlg); - TSDB_CHECK_CODE(code, lino, _exit); - } - } - - if (pWriter->mDataBlk.nItem) { - SBlockIdx* pBlockIdx = taosArrayReserve(pWriter->aBlockIdx, 1); - if (pBlockIdx == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); } - - pBlockIdx->suid = pWriter->tbid.suid; - pBlockIdx->uid = pWriter->tbid.uid; - - code = tsdbWriteDataBlk(pWriter->pDataFWriter, &pWriter->mDataBlk, pBlockIdx); - TSDB_CHECK_CODE(code, lino, _exit); } _exit: if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + TSDB_ERROR_LOG(TD_VID(writer->tsdb->pVnode), lino, code); } return code; } -static int32_t tsdbSnapWriteFileDataStart(STsdbSnapWriter* pWriter, int32_t fid) { +static int32_t tsdbSnapWriteFileSetCloseReader(STsdbSnapWriter* writer) { + TARRAY2_CLEAR(writer->ctx->sttReaderArr, tsdbSttFileReaderClose); + tsdbDataFileReaderClose(&writer->ctx->dataReader); + return 0; +} + +static int32_t tsdbSnapWriteFileSetOpenIter(STsdbSnapWriter* writer) { int32_t code = 0; int32_t lino = 0; - ASSERT(pWriter->pDataFWriter == NULL && pWriter->fid < fid); - - STsdb* pTsdb = pWriter->pTsdb; + // data ieter + if (writer->ctx->dataReader) { + STsdbIter* iter; + STsdbIterConfig config = {0}; - pWriter->fid = fid; - pWriter->tbid = (TABLEID){0}; - SDFileSet* pSet = taosArraySearch(pWriter->fs.aDFileSet, &(SDFileSet){.fid = fid}, tDFileSetCmprFn, TD_EQ); + // data + config.type = TSDB_ITER_TYPE_DATA; + config.dataReader = writer->ctx->dataReader; - // open reader - pWriter->pDataFReader = NULL; - pWriter->iterList = NULL; - pWriter->pDIter = NULL; - pWriter->pSIter = NULL; - tRBTreeCreate(&pWriter->rbt, tsdbDataIterCmprFn); - if (pSet) { - code = tsdbDataFReaderOpen(&pWriter->pDataFReader, pTsdb, pSet); + code = tsdbIterOpen(&config, &iter); TSDB_CHECK_CODE(code, lino, _exit); - code = tsdbOpenDataFileDataIter(pWriter->pDataFReader, &pWriter->pDIter); + code = TARRAY2_APPEND(writer->ctx->dataIterArr, iter); TSDB_CHECK_CODE(code, lino, _exit); - if (pWriter->pDIter) { - pWriter->pDIter->next = pWriter->iterList; - pWriter->iterList = pWriter->pDIter; - } - - for (int32_t iStt = 0; iStt < pSet->nSttF; iStt++) { - code = tsdbOpenSttFileDataIter(pWriter->pDataFReader, iStt, &pWriter->pSIter); - TSDB_CHECK_CODE(code, lino, _exit); - - if (pWriter->pSIter) { - code = tsdbDataIterNext2(pWriter->pSIter, NULL); - TSDB_CHECK_CODE(code, lino, _exit); - // add to tree - tRBTreePut(&pWriter->rbt, &pWriter->pSIter->rbtn); + // tome + config.type = TSDB_ITER_TYPE_DATA_TOMB; + config.dataReader = writer->ctx->dataReader; - // add to list - pWriter->pSIter->next = pWriter->iterList; - pWriter->iterList = pWriter->pSIter; - } - } - - pWriter->pSIter = NULL; - } - - // open writer - SDiskID diskId; - if (pSet) { - diskId = pSet->diskId; - } else { - code = tfsAllocDisk(pTsdb->pVnode->pTfs, 0 /*TODO*/, &diskId); - TSDB_CHECK_CODE(code, lino, _exit); - code = tfsMkdirRecurAt(pTsdb->pVnode->pTfs, pTsdb->path, diskId); + code = tsdbIterOpen(&config, &iter); TSDB_CHECK_CODE(code, lino, _exit); - } - SDFileSet wSet = {.diskId = diskId, - .fid = fid, - .pHeadF = &(SHeadFile){.commitID = pWriter->commitID}, - .pDataF = (pSet) ? pSet->pDataF : &(SDataFile){.commitID = pWriter->commitID}, - .pSmaF = (pSet) ? pSet->pSmaF : &(SSmaFile){.commitID = pWriter->commitID}, - .nSttF = 1, - .aSttF = {&(SSttFile){.commitID = pWriter->commitID}}}; - code = tsdbDataFWriterOpen(&pWriter->pDataFWriter, pTsdb, &wSet); - TSDB_CHECK_CODE(code, lino, _exit); - if (pWriter->aBlockIdx) { - taosArrayClear(pWriter->aBlockIdx); - } else if ((pWriter->aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx))) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; + code = TARRAY2_APPEND(writer->ctx->tombIterArr, iter); TSDB_CHECK_CODE(code, lino, _exit); } - tMapDataReset(&pWriter->mDataBlk); + // stt iter + SSttFileReader* sttFileReader; + TARRAY2_FOREACH(writer->ctx->sttReaderArr, sttFileReader) { + STsdbIter* iter; + STsdbIterConfig config = {0}; - if (pWriter->aSttBlk) { - taosArrayClear(pWriter->aSttBlk); - } else if ((pWriter->aSttBlk = taosArrayInit(0, sizeof(SSttBlk))) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; + // data + config.type = TSDB_ITER_TYPE_STT; + config.sttReader = sttFileReader; + + code = tsdbIterOpen(&config, &iter); TSDB_CHECK_CODE(code, lino, _exit); - } - tBlockDataReset(&pWriter->bData); - tBlockDataReset(&pWriter->sData); + code = TARRAY2_APPEND(writer->ctx->dataIterArr, iter); + TSDB_CHECK_CODE(code, lino, _exit); -_exit: - if (code) { - tsdbError("vgId:%d %s failed at line %d since %s, fid:%d", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code), - fid); - } else { - tsdbDebug("vgId:%d %s done, fid:%d", TD_VID(pTsdb->pVnode), __func__, fid); - } - return code; -} + // tomb + config.type = TSDB_ITER_TYPE_STT_TOMB; + config.sttReader = sttFileReader; -static int32_t tsdbSnapWriteTableData(STsdbSnapWriter* pWriter, SRowInfo* pRowInfo) { - int32_t code = 0; - int32_t lino = 0; - - // switch to new table if need - if (pRowInfo == NULL || pRowInfo->uid != pWriter->tbid.uid) { - if (pWriter->tbid.uid) { - code = tsdbSnapWriteTableDataEnd(pWriter); - TSDB_CHECK_CODE(code, lino, _exit); - } + code = tsdbIterOpen(&config, &iter); + TSDB_CHECK_CODE(code, lino, _exit); - code = tsdbSnapWriteTableDataStart(pWriter, (TABLEID*)pRowInfo); + code = TARRAY2_APPEND(writer->ctx->tombIterArr, iter); TSDB_CHECK_CODE(code, lino, _exit); } - if (pRowInfo == NULL) goto _exit; + // open merger + code = tsdbIterMergerOpen(writer->ctx->dataIterArr, &writer->ctx->dataIterMerger, false); + TSDB_CHECK_CODE(code, lino, _exit); - code = tsdbSnapWriteTableRow(pWriter, &pRowInfo->row); + code = tsdbIterMergerOpen(writer->ctx->tombIterArr, &writer->ctx->tombIterMerger, true); TSDB_CHECK_CODE(code, lino, _exit); _exit: if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + TSDB_ERROR_LOG(TD_VID(writer->tsdb->pVnode), lino, code); } return code; } -static int32_t tsdbSnapWriteNextRow(STsdbSnapWriter* pWriter, SRowInfo** ppRowInfo) { - int32_t code = 0; - int32_t lino = 0; - - if (pWriter->pSIter) { - code = tsdbDataIterNext2(pWriter->pSIter, NULL); - TSDB_CHECK_CODE(code, lino, _exit); - - if (pWriter->pSIter->rowInfo.suid == 0 && pWriter->pSIter->rowInfo.uid == 0) { - pWriter->pSIter = NULL; - } else { - SRBTreeNode* pNode = tRBTreeMin(&pWriter->rbt); - if (pNode) { - int32_t c = tsdbDataIterCmprFn(&pWriter->pSIter->rbtn, pNode); - if (c > 0) { - tRBTreePut(&pWriter->rbt, &pWriter->pSIter->rbtn); - pWriter->pSIter = NULL; - } else if (c == 0) { - ASSERT(0); - } - } - } - } - - if (pWriter->pSIter == NULL) { - SRBTreeNode* pNode = tRBTreeMin(&pWriter->rbt); - if (pNode) { - tRBTreeDrop(&pWriter->rbt, pNode); - pWriter->pSIter = TSDB_RBTN_TO_DATA_ITER(pNode); - } - } - - if (ppRowInfo) { - if (pWriter->pSIter) { - *ppRowInfo = &pWriter->pSIter->rowInfo; - } else { - *ppRowInfo = NULL; - } - } - -_exit: - if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); - } - return code; +static int32_t tsdbSnapWriteFileSetCloseIter(STsdbSnapWriter* writer) { + tsdbIterMergerClose(&writer->ctx->dataIterMerger); + tsdbIterMergerClose(&writer->ctx->tombIterMerger); + TARRAY2_CLEAR(writer->ctx->dataIterArr, tsdbIterClose); + TARRAY2_CLEAR(writer->ctx->tombIterArr, tsdbIterClose); + return 0; } -static int32_t tsdbSnapWriteGetRow(STsdbSnapWriter* pWriter, SRowInfo** ppRowInfo) { +static int32_t tsdbSnapWriteFileSetOpenWriter(STsdbSnapWriter* writer) { int32_t code = 0; int32_t lino = 0; - if (pWriter->pSIter) { - *ppRowInfo = &pWriter->pSIter->rowInfo; - goto _exit; - } - - code = tsdbSnapWriteNextRow(pWriter, ppRowInfo); + SFSetWriterConfig config = { + .tsdb = writer->tsdb, + .toSttOnly = false, + .compactVersion = writer->compactVersion, + .minRow = writer->minRow, + .maxRow = writer->maxRow, + .szPage = writer->szPage, + .cmprAlg = writer->cmprAlg, + .fid = writer->ctx->fid, + .cid = writer->commitID, + .did = writer->ctx->did, + .level = 0, + }; + + code = tsdbFSetWriterOpen(&config, &writer->ctx->fsetWriter); TSDB_CHECK_CODE(code, lino, _exit); _exit: if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + TSDB_ERROR_LOG(TD_VID(writer->tsdb->pVnode), lino, code); } return code; } -static int32_t tsdbSnapWriteFileDataEnd(STsdbSnapWriter* pWriter) { +static int32_t tsdbSnapWriteFileSetCloseWriter(STsdbSnapWriter* writer) { + return tsdbFSetWriterClose(&writer->ctx->fsetWriter, 0, writer->fopArr); +} + +static int32_t tsdbSnapWriteFileSetBegin(STsdbSnapWriter* writer, int32_t fid) { int32_t code = 0; int32_t lino = 0; - ASSERT(pWriter->pDataFWriter); + ASSERT(writer->ctx->fsetWriteBegin == false); - // consume remain data and end with a NULL table row - SRowInfo* pRowInfo; - code = tsdbSnapWriteGetRow(pWriter, &pRowInfo); - TSDB_CHECK_CODE(code, lino, _exit); - for (;;) { - code = tsdbSnapWriteTableData(pWriter, pRowInfo); - TSDB_CHECK_CODE(code, lino, _exit); + STFileSet* fset = &(STFileSet){.fid = fid}; - if (pRowInfo == NULL) break; + writer->ctx->fid = fid; + STFileSet** fsetPtr = TARRAY2_SEARCH(writer->fsetArr, &fset, tsdbTFileSetCmprFn, TD_EQ); + writer->ctx->fset = (fsetPtr == NULL) ? NULL : *fsetPtr; - code = tsdbSnapWriteNextRow(pWriter, &pRowInfo); + int32_t level = tsdbFidLevel(fid, &writer->tsdb->keepCfg, taosGetTimestampSec()); + if (tfsAllocDisk(writer->tsdb->pVnode->pTfs, level, &writer->ctx->did)) { + code = TSDB_CODE_NO_AVAIL_DISK; TSDB_CHECK_CODE(code, lino, _exit); } + tfsMkdirRecurAt(writer->tsdb->pVnode->pTfs, writer->tsdb->path, writer->ctx->did); - // do file-level updates - code = tsdbWriteSttBlk(pWriter->pDataFWriter, pWriter->aSttBlk); - TSDB_CHECK_CODE(code, lino, _exit); - - code = tsdbWriteBlockIdx(pWriter->pDataFWriter, pWriter->aBlockIdx); - TSDB_CHECK_CODE(code, lino, _exit); + writer->ctx->hasData = true; + writer->ctx->hasTomb = true; - code = tsdbUpdateDFileSetHeader(pWriter->pDataFWriter); + code = tsdbSnapWriteFileSetOpenReader(writer); TSDB_CHECK_CODE(code, lino, _exit); - code = tsdbFSUpsertFSet(&pWriter->fs, &pWriter->pDataFWriter->wSet); + code = tsdbSnapWriteFileSetOpenIter(writer); TSDB_CHECK_CODE(code, lino, _exit); - code = tsdbDataFWriterClose(&pWriter->pDataFWriter, 1); + code = tsdbSnapWriteFileSetOpenWriter(writer); TSDB_CHECK_CODE(code, lino, _exit); - if (pWriter->pDataFReader) { - code = tsdbDataFReaderClose(&pWriter->pDataFReader); - TSDB_CHECK_CODE(code, lino, _exit); - } - - // clear sources - while (pWriter->iterList) { - STsdbDataIter2* pIter = pWriter->iterList; - pWriter->iterList = pIter->next; - tsdbCloseDataIter2(pIter); - } + writer->ctx->fsetWriteBegin = true; _exit: if (code) { - tsdbError("vgId:%d %s failed since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, tstrerror(code)); - } else { - tsdbDebug("vgId:%d %s is done", TD_VID(pWriter->pTsdb->pVnode), __func__); + TSDB_ERROR_LOG(TD_VID(writer->tsdb->pVnode), lino, code); } return code; } -static int32_t tsdbSnapWriteTimeSeriesData(STsdbSnapWriter* pWriter, SSnapDataHdr* pHdr) { +static int32_t tsdbSnapWriteTombRecord(STsdbSnapWriter* writer, const STombRecord* record) { int32_t code = 0; int32_t lino = 0; - code = tDecmprBlockData(pHdr->data, pHdr->size, &pWriter->inData, pWriter->aBuf); - TSDB_CHECK_CODE(code, lino, _exit); - - ASSERT(pWriter->inData.nRow > 0); + while (writer->ctx->hasTomb) { + STombRecord* record1 = tsdbIterMergerGetTombRecord(writer->ctx->tombIterMerger); + if (record1 == NULL) { + writer->ctx->hasTomb = false; + break; + } - // switch to new data file if need - int32_t fid = tsdbKeyFid(pWriter->inData.aTSKEY[0], pWriter->minutes, pWriter->precision); - if (pWriter->fid != fid) { - if (pWriter->pDataFWriter) { - code = tsdbSnapWriteFileDataEnd(pWriter); + int32_t c = tTombRecordCompare(record1, record); + if (c <= 0) { + code = tsdbFSetWriteTombRecord(writer->ctx->fsetWriter, record1); TSDB_CHECK_CODE(code, lino, _exit); + } else { + break; } + } - code = tsdbSnapWriteFileDataStart(pWriter, fid); - TSDB_CHECK_CODE(code, lino, _exit); + if (record->suid == INT64_MAX) { + ASSERT(writer->ctx->hasTomb == false); + goto _exit; } - // loop write each row - SRowInfo* pRowInfo; - code = tsdbSnapWriteGetRow(pWriter, &pRowInfo); + code = tsdbFSetWriteTombRecord(writer->ctx->fsetWriter, record); TSDB_CHECK_CODE(code, lino, _exit); - for (int32_t iRow = 0; iRow < pWriter->inData.nRow; ++iRow) { - SRowInfo rInfo = {.suid = pWriter->inData.suid, - .uid = pWriter->inData.uid ? pWriter->inData.uid : pWriter->inData.aUid[iRow], - .row = tsdbRowFromBlockData(&pWriter->inData, iRow)}; - - for (;;) { - if (pRowInfo == NULL) { - code = tsdbSnapWriteTableData(pWriter, &rInfo); - TSDB_CHECK_CODE(code, lino, _exit); - break; - } else { - int32_t c = tRowInfoCmprFn(&rInfo, pRowInfo); - if (c < 0) { - code = tsdbSnapWriteTableData(pWriter, &rInfo); - TSDB_CHECK_CODE(code, lino, _exit); - break; - } else if (c > 0) { - code = tsdbSnapWriteTableData(pWriter, pRowInfo); - TSDB_CHECK_CODE(code, lino, _exit); - - code = tsdbSnapWriteNextRow(pWriter, &pRowInfo); - TSDB_CHECK_CODE(code, lino, _exit); - } else { - ASSERT(0); - } - } - } - } _exit: if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); - } else { - tsdbDebug("vgId:%d %s done, suid:%" PRId64 " uid:%" PRId64 " nRow:%d", TD_VID(pWriter->pTsdb->pVnode), __func__, - pWriter->inData.suid, pWriter->inData.uid, pWriter->inData.nRow); + TSDB_ERROR_LOG(TD_VID(writer->tsdb->pVnode), lino, code); } return code; } -// SNAP_DATA_DEL -static int32_t tsdbSnapWriteDelTableDataStart(STsdbSnapWriter* pWriter, TABLEID* pId) { +static int32_t tsdbSnapWriteFileSetEnd(STsdbSnapWriter* writer) { + if (!writer->ctx->fsetWriteBegin) return 0; + int32_t code = 0; int32_t lino = 0; - if (pId) { - pWriter->tbid = *pId; - } else { - pWriter->tbid = (TABLEID){.suid = INT64_MAX, .uid = INT64_MAX}; - } - - taosArrayClear(pWriter->aDelData); + SRowInfo row = { + .suid = INT64_MAX, + .uid = INT64_MAX, + }; - if (pWriter->pTIter) { - while (pWriter->pTIter->tIter.iDelIdx < taosArrayGetSize(pWriter->pTIter->tIter.aDelIdx)) { - SDelIdx* pDelIdx = taosArrayGet(pWriter->pTIter->tIter.aDelIdx, pWriter->pTIter->tIter.iDelIdx); + code = tsdbSnapWriteTimeSeriesRow(writer, &row); + TSDB_CHECK_CODE(code, lino, _exit); - int32_t c = tTABLEIDCmprFn(pDelIdx, &pWriter->tbid); - if (c < 0) { - code = tsdbReadDelDatav1(pWriter->pDelFReader, pDelIdx, pWriter->pTIter->tIter.aDelData, INT64_MAX); - TSDB_CHECK_CODE(code, lino, _exit); + STombRecord record = { + .suid = INT64_MAX, + .uid = INT64_MAX, + }; - SDelIdx* pDelIdxNew = taosArrayReserve(pWriter->aDelIdx, 1); - if (pDelIdxNew == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } + code = tsdbSnapWriteTombRecord(writer, &record); + TSDB_CHECK_CODE(code, lino, _exit); - pDelIdxNew->suid = pDelIdx->suid; - pDelIdxNew->uid = pDelIdx->uid; + // close write + code = tsdbSnapWriteFileSetCloseWriter(writer); + TSDB_CHECK_CODE(code, lino, _exit); - code = tsdbWriteDelData(pWriter->pDelFWriter, pWriter->pTIter->tIter.aDelData, pDelIdxNew); - TSDB_CHECK_CODE(code, lino, _exit); + code = tsdbSnapWriteFileSetCloseIter(writer); + TSDB_CHECK_CODE(code, lino, _exit); - pWriter->pTIter->tIter.iDelIdx++; - } else if (c == 0) { - code = tsdbReadDelDatav1(pWriter->pDelFReader, pDelIdx, pWriter->aDelData, INT64_MAX); - TSDB_CHECK_CODE(code, lino, _exit); + code = tsdbSnapWriteFileSetCloseReader(writer); + TSDB_CHECK_CODE(code, lino, _exit); - pWriter->pTIter->tIter.iDelIdx++; - break; - } else { - break; - } - } - } + writer->ctx->fsetWriteBegin = false; _exit: if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); - } else { - tsdbTrace("vgId:%d %s done, suid:%" PRId64 " uid:%" PRId64, TD_VID(pWriter->pTsdb->pVnode), __func__, - pWriter->tbid.suid, pWriter->tbid.uid); + TSDB_ERROR_LOG(TD_VID(writer->tsdb->pVnode), lino, code); } return code; } -static int32_t tsdbSnapWriteDelTableDataEnd(STsdbSnapWriter* pWriter) { +static int32_t tsdbSnapWriteTimeSeriesData(STsdbSnapWriter* writer, SSnapDataHdr* hdr) { int32_t code = 0; int32_t lino = 0; - if (taosArrayGetSize(pWriter->aDelData) > 0) { - SDelIdx* pDelIdx = taosArrayReserve(pWriter->aDelIdx, 1); - if (pDelIdx == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } + SBlockData blockData[1] = {0}; - pDelIdx->suid = pWriter->tbid.suid; - pDelIdx->uid = pWriter->tbid.uid; + code = tDecmprBlockData(hdr->data, hdr->size - sizeof(*hdr), blockData, writer->aBuf); + TSDB_CHECK_CODE(code, lino, _exit); - code = tsdbWriteDelData(pWriter->pDelFWriter, pWriter->aDelData, pDelIdx); + int32_t fid = tsdbKeyFid(blockData->aTSKEY[0], writer->minutes, writer->precision); + if (!writer->ctx->fsetWriteBegin || fid != writer->ctx->fid) { + code = tsdbSnapWriteFileSetEnd(writer); TSDB_CHECK_CODE(code, lino, _exit); - } -_exit: - if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); - } else { - tsdbTrace("vgId:%d %s done", TD_VID(pWriter->pTsdb->pVnode), __func__); - } - return code; -} - -static int32_t tsdbSnapWriteDelTableData(STsdbSnapWriter* pWriter, TABLEID* pId, uint8_t* pData, int64_t size) { - int32_t code = 0; - int32_t lino = 0; - - if (pId == NULL || pId->uid != pWriter->tbid.uid) { - if (pWriter->tbid.uid) { - code = tsdbSnapWriteDelTableDataEnd(pWriter); - TSDB_CHECK_CODE(code, lino, _exit); - } - - code = tsdbSnapWriteDelTableDataStart(pWriter, pId); + code = tsdbSnapWriteFileSetBegin(writer, fid); TSDB_CHECK_CODE(code, lino, _exit); } - if (pId == NULL) goto _exit; - - int64_t n = 0; - while (n < size) { - SDelData delData; - n += tGetDelData(pData + n, &delData); + for (int32_t i = 0; i < blockData->nRow; ++i) { + SRowInfo rowInfo = { + .suid = blockData->suid, + .uid = blockData->uid ? blockData->uid : blockData->aUid[i], + .row = tsdbRowFromBlockData(blockData, i), + }; - if (taosArrayPush(pWriter->aDelData, &delData) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } + code = tsdbSnapWriteTimeSeriesRow(writer, &rowInfo); + TSDB_CHECK_CODE(code, lino, _exit); } - ASSERT(n == size); _exit: if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + TSDB_ERROR_LOG(TD_VID(writer->tsdb->pVnode), lino, code); + } else { + tsdbDebug("vgId:%d %s done, suid:%" PRId64 " uid:%" PRId64 " nRow:%d", TD_VID(writer->tsdb->pVnode), __func__, + blockData->suid, blockData->uid, blockData->nRow); } + tBlockDataDestroy(blockData); return code; } -static int32_t tsdbSnapWriteDelDataStart(STsdbSnapWriter* pWriter) { +static int32_t tsdbSnapWriteDecmprTombBlock(SSnapDataHdr* hdr, STombBlock* tombBlock) { int32_t code = 0; int32_t lino = 0; - STsdb* pTsdb = pWriter->pTsdb; - SDelFile* pDelFile = pWriter->fs.pDelFile; - - pWriter->tbid = (TABLEID){0}; + int64_t size = hdr->size - sizeof(*hdr); + ASSERT(size % TOMB_RECORD_ELEM_NUM == 0); + size = size / TOMB_RECORD_ELEM_NUM; + ASSERT(size % sizeof(int64_t) == 0); - // reader - if (pDelFile) { - code = tsdbDelFReaderOpen(&pWriter->pDelFReader, pDelFile, pTsdb); - TSDB_CHECK_CODE(code, lino, _exit); - - code = tsdbOpenTombFileDataIter(pWriter->pDelFReader, &pWriter->pTIter); - TSDB_CHECK_CODE(code, lino, _exit); - } - - // writer - code = tsdbDelFWriterOpen(&pWriter->pDelFWriter, &(SDelFile){.commitID = pWriter->commitID}, pTsdb); - TSDB_CHECK_CODE(code, lino, _exit); - - if ((pWriter->aDelIdx = taosArrayInit(0, sizeof(SDelIdx))) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - if ((pWriter->aDelData = taosArrayInit(0, sizeof(SDelData))) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; + int64_t* data = (int64_t*)hdr->data; + for (int32_t i = 0; i < TOMB_RECORD_ELEM_NUM; ++i) { + code = TARRAY2_APPEND_BATCH(&tombBlock->dataArr[i], hdr->data + i * size, size / sizeof(int64_t)); TSDB_CHECK_CODE(code, lino, _exit); } _exit: - if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); - } else { - tsdbDebug("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__); - } return code; } -static int32_t tsdbSnapWriteDelDataEnd(STsdbSnapWriter* pWriter) { +static int32_t tsdbSnapWriteTombData(STsdbSnapWriter* writer, SSnapDataHdr* hdr) { int32_t code = 0; int32_t lino = 0; - STsdb* pTsdb = pWriter->pTsdb; - - // end remaining table with NULL data - code = tsdbSnapWriteDelTableData(pWriter, NULL, NULL, 0); - TSDB_CHECK_CODE(code, lino, _exit); - - // update file-level info - code = tsdbWriteDelIdx(pWriter->pDelFWriter, pWriter->aDelIdx); - TSDB_CHECK_CODE(code, lino, _exit); - - code = tsdbUpdateDelFileHdr(pWriter->pDelFWriter); - TSDB_CHECK_CODE(code, lino, _exit); + STombRecord record; + STombBlock tombBlock[1] = {0}; - code = tsdbFSUpsertDelFile(&pWriter->fs, &pWriter->pDelFWriter->fDel); + code = tsdbSnapWriteDecmprTombBlock(hdr, tombBlock); TSDB_CHECK_CODE(code, lino, _exit); - code = tsdbDelFWriterClose(&pWriter->pDelFWriter, 1); - TSDB_CHECK_CODE(code, lino, _exit); + tTombBlockGet(tombBlock, 0, &record); + int32_t fid = tsdbKeyFid(record.skey, writer->minutes, writer->precision); + if (!writer->ctx->fsetWriteBegin || fid != writer->ctx->fid) { + code = tsdbSnapWriteFileSetEnd(writer); + TSDB_CHECK_CODE(code, lino, _exit); - if (pWriter->pDelFReader) { - code = tsdbDelFReaderClose(&pWriter->pDelFReader); + code = tsdbSnapWriteFileSetBegin(writer, fid); TSDB_CHECK_CODE(code, lino, _exit); } - if (pWriter->pTIter) { - tsdbCloseDataIter2(pWriter->pTIter); - pWriter->pTIter = NULL; - } + if (writer->ctx->hasData) { + SRowInfo row = { + .suid = INT64_MAX, + .uid = INT64_MAX, + }; -_exit: - if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); - } else { - tsdbInfo("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__); + code = tsdbSnapWriteTimeSeriesRow(writer, &row); + TSDB_CHECK_CODE(code, lino, _exit); } - return code; -} -static int32_t tsdbSnapWriteDelData(STsdbSnapWriter* pWriter, SSnapDataHdr* pHdr) { - int32_t code = 0; - int32_t lino = 0; + ASSERT(writer->ctx->hasData == false); - STsdb* pTsdb = pWriter->pTsdb; + for (int32_t i = 0; i < TOMB_BLOCK_SIZE(tombBlock); ++i) { + tTombBlockGet(tombBlock, i, &record); - // start to write del data if need - if (pWriter->pDelFWriter == NULL) { - code = tsdbSnapWriteDelDataStart(pWriter); + code = tsdbSnapWriteTombRecord(writer, &record); TSDB_CHECK_CODE(code, lino, _exit); } - // do write del data - code = tsdbSnapWriteDelTableData(pWriter, (TABLEID*)pHdr->data, pHdr->data + sizeof(TABLEID), - pHdr->size - sizeof(TABLEID)); - TSDB_CHECK_CODE(code, lino, _exit); + tTombBlockDestroy(tombBlock); _exit: if (code) { - tsdbError("vgId:%d %s failed since %s", TD_VID(pTsdb->pVnode), __func__, tstrerror(code)); - } else { - tsdbTrace("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__); + TSDB_ERROR_LOG(TD_VID(writer->tsdb->pVnode), lino, code); } return code; } -// APIs -int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWriter** ppWriter) { +int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWriter** writer) { int32_t code = 0; int32_t lino = 0; - // alloc - STsdbSnapWriter* pWriter = (STsdbSnapWriter*)taosMemoryCalloc(1, sizeof(*pWriter)); - if (pWriter == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - pWriter->pTsdb = pTsdb; - pWriter->sver = sver; - pWriter->ever = ever; - pWriter->minutes = pTsdb->keepCfg.days; - pWriter->precision = pTsdb->keepCfg.precision; - pWriter->minRow = pTsdb->pVnode->config.tsdbCfg.minRows; - pWriter->maxRow = pTsdb->pVnode->config.tsdbCfg.maxRows; - pWriter->cmprAlg = pTsdb->pVnode->config.tsdbCfg.compression; - pWriter->commitID = pTsdb->pVnode->state.commitID; - - code = tsdbFSCopy(pTsdb, &pWriter->fs); - TSDB_CHECK_CODE(code, lino, _exit); - - // SNAP_DATA_TSDB - code = tBlockDataCreate(&pWriter->inData); - TSDB_CHECK_CODE(code, lino, _exit); - - pWriter->fid = INT32_MIN; - - code = tBlockDataCreate(&pWriter->bData); - TSDB_CHECK_CODE(code, lino, _exit); - - code = tBlockDataCreate(&pWriter->sData); + writer[0] = taosMemoryCalloc(1, sizeof(*writer[0])); + if (writer[0] == NULL) return TSDB_CODE_OUT_OF_MEMORY; + + writer[0]->tsdb = pTsdb; + writer[0]->sver = sver; + writer[0]->ever = ever; + writer[0]->minutes = pTsdb->keepCfg.days; + writer[0]->precision = pTsdb->keepCfg.precision; + writer[0]->minRow = pTsdb->pVnode->config.tsdbCfg.minRows; + writer[0]->maxRow = pTsdb->pVnode->config.tsdbCfg.maxRows; + writer[0]->commitID = tsdbFSAllocEid(pTsdb->pFS); + writer[0]->szPage = pTsdb->pVnode->config.tsdbPageSize; + writer[0]->compactVersion = INT64_MAX; + writer[0]->now = taosGetTimestampMs(); + + code = tsdbFSCreateCopySnapshot(pTsdb->pFS, &writer[0]->fsetArr); TSDB_CHECK_CODE(code, lino, _exit); - // SNAP_DATA_DEL + tsdbFSDisableBgTask(pTsdb->pFS); _exit: if (code) { tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); - if (pWriter) { - tBlockDataDestroy(&pWriter->sData); - tBlockDataDestroy(&pWriter->bData); - tBlockDataDestroy(&pWriter->inData); - tsdbFSDestroy(&pWriter->fs); - taosMemoryFree(pWriter); - pWriter = NULL; - } } else { tsdbInfo("vgId:%d %s done, sver:%" PRId64 " ever:%" PRId64, TD_VID(pTsdb->pVnode), __func__, sver, ever); } - *ppWriter = pWriter; return code; } -int32_t tsdbSnapWriterPrepareClose(STsdbSnapWriter* pWriter) { +int32_t tsdbSnapWriterPrepareClose(STsdbSnapWriter* writer) { int32_t code = 0; int32_t lino = 0; - if (pWriter->pDataFWriter) { - code = tsdbSnapWriteFileDataEnd(pWriter); - TSDB_CHECK_CODE(code, lino, _exit); - } - - if (pWriter->pDelFWriter) { - code = tsdbSnapWriteDelDataEnd(pWriter); - TSDB_CHECK_CODE(code, lino, _exit); - } + code = tsdbSnapWriteFileSetEnd(writer); + TSDB_CHECK_CODE(code, lino, _exit); - code = tsdbFSPrepareCommit(pWriter->pTsdb, &pWriter->fs); + code = tsdbFSEditBegin(writer->tsdb->pFS, writer->fopArr, TSDB_FEDIT_COMMIT); TSDB_CHECK_CODE(code, lino, _exit); _exit: if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code)); + TSDB_ERROR_LOG(TD_VID(writer->tsdb->pVnode), lino, code); } else { - tsdbDebug("vgId:%d %s done", TD_VID(pWriter->pTsdb->pVnode), __func__); + tsdbDebug("vgId:%d %s done", TD_VID(writer->tsdb->pVnode), __func__); } return code; } -int32_t tsdbSnapWriterClose(STsdbSnapWriter** ppWriter, int8_t rollback) { +int32_t tsdbSnapWriterClose(STsdbSnapWriter** writer, int8_t rollback) { + if (writer[0] == NULL) return 0; + int32_t code = 0; int32_t lino = 0; - STsdbSnapWriter* pWriter = *ppWriter; - STsdb* pTsdb = pWriter->pTsdb; + STsdb* tsdb = writer[0]->tsdb; if (rollback) { - tsdbRollbackCommit(pWriter->pTsdb); + code = tsdbFSEditAbort(writer[0]->tsdb->pFS); + TSDB_CHECK_CODE(code, lino, _exit); } else { - // lock - taosThreadRwlockWrlock(&pTsdb->rwLock); + taosThreadRwlockWrlock(&writer[0]->tsdb->rwLock); - code = tsdbFSCommit(pWriter->pTsdb); + code = tsdbFSEditCommit(writer[0]->tsdb->pFS); if (code) { - taosThreadRwlockUnlock(&pTsdb->rwLock); + taosThreadRwlockUnlock(&writer[0]->tsdb->rwLock); TSDB_CHECK_CODE(code, lino, _exit); } - // unlock - taosThreadRwlockUnlock(&pTsdb->rwLock); + taosThreadRwlockUnlock(&writer[0]->tsdb->rwLock); } + tsdbFSEnableBgTask(tsdb->pFS); + + tsdbIterMergerClose(&writer[0]->ctx->tombIterMerger); + tsdbIterMergerClose(&writer[0]->ctx->dataIterMerger); + TARRAY2_DESTROY(writer[0]->ctx->tombIterArr, tsdbIterClose); + TARRAY2_DESTROY(writer[0]->ctx->dataIterArr, tsdbIterClose); + TARRAY2_DESTROY(writer[0]->ctx->sttReaderArr, tsdbSttFileReaderClose); + tsdbDataFileReaderClose(&writer[0]->ctx->dataReader); - // SNAP_DATA_DEL - taosArrayDestroy(pWriter->aDelData); - taosArrayDestroy(pWriter->aDelIdx); - - // SNAP_DATA_TSDB - tBlockDataDestroy(&pWriter->sData); - tBlockDataDestroy(&pWriter->bData); - taosArrayDestroy(pWriter->aSttBlk); - tMapDataClear(&pWriter->mDataBlk); - taosArrayDestroy(pWriter->aBlockIdx); - tDestroyTSchema(pWriter->skmTable.pTSchema); - tBlockDataDestroy(&pWriter->inData); - - for (int32_t iBuf = 0; iBuf < sizeof(pWriter->aBuf) / sizeof(uint8_t*); iBuf++) { - tFree(pWriter->aBuf[iBuf]); + TARRAY2_DESTROY(writer[0]->fopArr, NULL); + tsdbFSDestroyCopySnapshot(&writer[0]->fsetArr); + + for (int32_t i = 0; i < ARRAY_SIZE(writer[0]->aBuf); ++i) { + tFree(writer[0]->aBuf[i]); } - tsdbFSDestroy(&pWriter->fs); - taosMemoryFree(pWriter); - *ppWriter = NULL; + + taosMemoryFree(writer[0]); + writer[0] = NULL; _exit: if (code) { - tsdbError("vgId:%d %s failed at line %d since %s", TD_VID(pTsdb->pVnode), __func__, lino, tstrerror(code)); + TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); } else { - tsdbInfo("vgId:%d %s done", TD_VID(pTsdb->pVnode), __func__); + tsdbInfo("vgId:%d %s done", TD_VID(tsdb->pVnode), __func__); } return code; } -int32_t tsdbSnapWrite(STsdbSnapWriter* pWriter, SSnapDataHdr* pHdr) { +int32_t tsdbSnapWrite(STsdbSnapWriter* writer, SSnapDataHdr* hdr) { int32_t code = 0; int32_t lino = 0; - if (pHdr->type == SNAP_DATA_TSDB) { - code = tsdbSnapWriteTimeSeriesData(pWriter, pHdr); + if (hdr->type == SNAP_DATA_TSDB) { + code = tsdbSnapWriteTimeSeriesData(writer, hdr); TSDB_CHECK_CODE(code, lino, _exit); - goto _exit; - } else if (pWriter->pDataFWriter) { - code = tsdbSnapWriteFileDataEnd(pWriter); - TSDB_CHECK_CODE(code, lino, _exit); - } - - if (pHdr->type == SNAP_DATA_DEL) { - code = tsdbSnapWriteDelData(pWriter, pHdr); + } else if (hdr->type == SNAP_DATA_DEL) { + code = tsdbSnapWriteTombData(writer, hdr); TSDB_CHECK_CODE(code, lino, _exit); - goto _exit; + } else { + ASSERT(0); } _exit: if (code) { tsdbError("vgId:%d %s failed at line %d since %s, type:%d index:%" PRId64 " size:%" PRId64, - TD_VID(pWriter->pTsdb->pVnode), __func__, lino, tstrerror(code), pHdr->type, pHdr->index, pHdr->size); + TD_VID(writer->tsdb->pVnode), __func__, lino, tstrerror(code), hdr->type, hdr->index, hdr->size); } else { - tsdbDebug("vgId:%d %s done, type:%d index:%" PRId64 " size:%" PRId64, TD_VID(pWriter->pTsdb->pVnode), __func__, - pHdr->type, pHdr->index, pHdr->size); + tsdbDebug("vgId:%d %s done, type:%d index:%" PRId64 " size:%" PRId64, TD_VID(writer->tsdb->pVnode), __func__, + hdr->type, hdr->index, hdr->size); } return code; } diff --git a/source/dnode/vnode/src/tsdb/tsdbSttFileRW.c b/source/dnode/vnode/src/tsdb/tsdbSttFileRW.c new file mode 100644 index 0000000000000000000000000000000000000000..27fae9dc6ee6892dac60a7b7df3badddb6cfa9d6 --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbSttFileRW.c @@ -0,0 +1,987 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbSttFileRW.h" + +// SSttFReader ============================================================ +struct SSttFileReader { + SSttFileReaderConfig config[1]; + STsdbFD *fd; + SSttFooter footer[1]; + struct { + bool sttBlkLoaded; + bool statisBlkLoaded; + bool tombBlkLoaded; + } ctx[1]; + TSttBlkArray sttBlkArray[1]; + TStatisBlkArray statisBlkArray[1]; + TTombBlkArray tombBlkArray[1]; + uint8_t *bufArr[5]; +}; + +// SSttFileReader +int32_t tsdbSttFileReaderOpen(const char *fname, const SSttFileReaderConfig *config, SSttFileReader **reader) { + int32_t code = 0; + int32_t lino = 0; + + reader[0] = taosMemoryCalloc(1, sizeof(*reader[0])); + if (reader[0] == NULL) return TSDB_CODE_OUT_OF_MEMORY; + + reader[0]->config[0] = config[0]; + if (reader[0]->config->bufArr == NULL) { + reader[0]->config->bufArr = reader[0]->bufArr; + } + + // open file + if (fname) { + code = tsdbOpenFile(fname, config->szPage, TD_FILE_READ, &reader[0]->fd); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + char fname1[TSDB_FILENAME_LEN]; + tsdbTFileName(config->tsdb, config->file, fname1); + code = tsdbOpenFile(fname1, config->szPage, TD_FILE_READ, &reader[0]->fd); + TSDB_CHECK_CODE(code, lino, _exit); + } + + // // open each segment reader + int64_t offset = config->file->size - sizeof(SSttFooter); + ASSERT(offset >= TSDB_FHDR_SIZE); + + code = tsdbReadFile(reader[0]->fd, offset, (uint8_t *)(reader[0]->footer), sizeof(SSttFooter)); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(config->tsdb->pVnode), lino, code); + tsdbSttFileReaderClose(reader); + } + return code; +} + +int32_t tsdbSttFileReaderClose(SSttFileReader **reader) { + if (reader[0]) { + for (int32_t i = 0; i < ARRAY_SIZE(reader[0]->bufArr); ++i) { + tFree(reader[0]->bufArr[i]); + } + tsdbCloseFile(&reader[0]->fd); + TARRAY2_DESTROY(reader[0]->tombBlkArray, NULL); + TARRAY2_DESTROY(reader[0]->statisBlkArray, NULL); + TARRAY2_DESTROY(reader[0]->sttBlkArray, NULL); + taosMemoryFree(reader[0]); + reader[0] = NULL; + } + return 0; +} + +// SSttFSegReader +int32_t tsdbSttFileReadStatisBlk(SSttFileReader *reader, const TStatisBlkArray **statisBlkArray) { + if (!reader->ctx->statisBlkLoaded) { + if (reader->footer->statisBlkPtr->size > 0) { + ASSERT(reader->footer->statisBlkPtr->size % sizeof(SStatisBlk) == 0); + + int32_t size = reader->footer->statisBlkPtr->size / sizeof(SStatisBlk); + void *data = taosMemoryMalloc(reader->footer->statisBlkPtr->size); + if (!data) return TSDB_CODE_OUT_OF_MEMORY; + + int32_t code = + tsdbReadFile(reader->fd, reader->footer->statisBlkPtr->offset, data, reader->footer->statisBlkPtr->size); + if (code) { + taosMemoryFree(data); + return code; + } + + TARRAY2_INIT_EX(reader->statisBlkArray, size, size, data); + } else { + TARRAY2_INIT(reader->statisBlkArray); + } + + reader->ctx->statisBlkLoaded = true; + } + + statisBlkArray[0] = reader->statisBlkArray; + return 0; +} + +int32_t tsdbSttFileReadTombBlk(SSttFileReader *reader, const TTombBlkArray **tombBlkArray) { + if (!reader->ctx->tombBlkLoaded) { + if (reader->footer->tombBlkPtr->size > 0) { + ASSERT(reader->footer->tombBlkPtr->size % sizeof(STombBlk) == 0); + + int32_t size = reader->footer->tombBlkPtr->size / sizeof(STombBlk); + void *data = taosMemoryMalloc(reader->footer->tombBlkPtr->size); + if (!data) return TSDB_CODE_OUT_OF_MEMORY; + + int32_t code = + tsdbReadFile(reader->fd, reader->footer->tombBlkPtr->offset, data, reader->footer->tombBlkPtr->size); + if (code) { + taosMemoryFree(data); + return code; + } + + TARRAY2_INIT_EX(reader->tombBlkArray, size, size, data); + } else { + TARRAY2_INIT(reader->tombBlkArray); + } + + reader->ctx->tombBlkLoaded = true; + } + + tombBlkArray[0] = reader->tombBlkArray; + return 0; +} + +int32_t tsdbSttFileReadSttBlk(SSttFileReader *reader, const TSttBlkArray **sttBlkArray) { + if (!reader->ctx->sttBlkLoaded) { + if (reader->footer->sttBlkPtr->size > 0) { + ASSERT(reader->footer->sttBlkPtr->size % sizeof(SSttBlk) == 0); + + int32_t size = reader->footer->sttBlkPtr->size / sizeof(SSttBlk); + void *data = taosMemoryMalloc(reader->footer->sttBlkPtr->size); + if (!data) return TSDB_CODE_OUT_OF_MEMORY; + + int32_t code = tsdbReadFile(reader->fd, reader->footer->sttBlkPtr->offset, data, reader->footer->sttBlkPtr->size); + if (code) { + taosMemoryFree(data); + return code; + } + + TARRAY2_INIT_EX(reader->sttBlkArray, size, size, data); + } else { + TARRAY2_INIT(reader->sttBlkArray); + } + + reader->ctx->sttBlkLoaded = true; + } + + sttBlkArray[0] = reader->sttBlkArray; + return 0; +} + +int32_t tsdbSttFileReadBlockData(SSttFileReader *reader, const SSttBlk *sttBlk, SBlockData *bData) { + int32_t code = 0; + int32_t lino = 0; + + code = tRealloc(&reader->config->bufArr[0], sttBlk->bInfo.szBlock); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbReadFile(reader->fd, sttBlk->bInfo.offset, reader->config->bufArr[0], sttBlk->bInfo.szBlock); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tDecmprBlockData(reader->config->bufArr[0], sttBlk->bInfo.szBlock, bData, &reader->config->bufArr[1]); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(reader->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbSttFileReadBlockDataByColumn(SSttFileReader *reader, const SSttBlk *sttBlk, SBlockData *bData, + STSchema *pTSchema, int16_t cids[], int32_t ncid) { + int32_t code = 0; + int32_t lino = 0; + + TABLEID tbid = {.suid = sttBlk->suid}; + if (tbid.suid == 0) { + tbid.uid = sttBlk->minUid; + } else { + tbid.uid = 0; + } + + code = tBlockDataInit(bData, &tbid, pTSchema, cids, ncid); + TSDB_CHECK_CODE(code, lino, _exit); + + // uid + version + tskey + code = tRealloc(&reader->config->bufArr[0], sttBlk->bInfo.szKey); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbReadFile(reader->fd, sttBlk->bInfo.offset, reader->config->bufArr[0], sttBlk->bInfo.szKey); + TSDB_CHECK_CODE(code, lino, _exit); + + // hdr + SDiskDataHdr hdr[1]; + int32_t size = 0; + + size += tGetDiskDataHdr(reader->config->bufArr[0] + size, hdr); + + ASSERT(hdr->delimiter == TSDB_FILE_DLMT); + + bData->nRow = hdr->nRow; + bData->uid = hdr->uid; + + // uid + if (hdr->uid == 0) { + ASSERT(hdr->szUid); + code = tsdbDecmprData(reader->config->bufArr[0] + size, hdr->szUid, TSDB_DATA_TYPE_BIGINT, hdr->cmprAlg, + (uint8_t **)&bData->aUid, sizeof(int64_t) * hdr->nRow, &reader->config->bufArr[1]); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + ASSERT(hdr->szUid == 0); + } + size += hdr->szUid; + + // version + code = tsdbDecmprData(reader->config->bufArr[0] + size, hdr->szVer, TSDB_DATA_TYPE_BIGINT, hdr->cmprAlg, + (uint8_t **)&bData->aVersion, sizeof(int64_t) * hdr->nRow, &reader->config->bufArr[1]); + TSDB_CHECK_CODE(code, lino, _exit); + size += hdr->szVer; + + // ts + code = tsdbDecmprData(reader->config->bufArr[0] + size, hdr->szKey, TSDB_DATA_TYPE_TIMESTAMP, hdr->cmprAlg, + (uint8_t **)&bData->aTSKEY, sizeof(TSKEY) * hdr->nRow, &reader->config->bufArr[1]); + TSDB_CHECK_CODE(code, lino, _exit); + size += hdr->szKey; + + ASSERT(size == sttBlk->bInfo.szKey); + + // other columns + if (bData->nColData > 0) { + if (hdr->szBlkCol > 0) { + code = tRealloc(&reader->config->bufArr[0], hdr->szBlkCol); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbReadFile(reader->fd, sttBlk->bInfo.offset + sttBlk->bInfo.szKey, reader->config->bufArr[0], + hdr->szBlkCol); + TSDB_CHECK_CODE(code, lino, _exit); + } + + SBlockCol bc[1] = {{.cid = 0}}; + SBlockCol *blockCol = bc; + + size = 0; + for (int32_t i = 0; i < bData->nColData; i++) { + SColData *colData = tBlockDataGetColDataByIdx(bData, i); + + while (blockCol && blockCol->cid < colData->cid) { + if (size < hdr->szBlkCol) { + size += tGetBlockCol(reader->config->bufArr[0] + size, blockCol); + } else { + ASSERT(size == hdr->szBlkCol); + blockCol = NULL; + } + } + + if (blockCol == NULL || blockCol->cid > colData->cid) { + for (int32_t iRow = 0; iRow < hdr->nRow; iRow++) { + code = tColDataAppendValue(colData, &COL_VAL_NONE(colData->cid, colData->type)); + TSDB_CHECK_CODE(code, lino, _exit); + } + } else { + ASSERT(blockCol->type == colData->type); + ASSERT(blockCol->flag && blockCol->flag != HAS_NONE); + + if (blockCol->flag == HAS_NULL) { + for (int32_t iRow = 0; iRow < hdr->nRow; iRow++) { + code = tColDataAppendValue(colData, &COL_VAL_NULL(blockCol->cid, blockCol->type)); + TSDB_CHECK_CODE(code, lino, _exit); + } + } else { + int32_t size1 = blockCol->szBitmap + blockCol->szOffset + blockCol->szValue; + + code = tRealloc(&reader->config->bufArr[1], size1); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbReadFile(reader->fd, sttBlk->bInfo.offset + sttBlk->bInfo.szKey + hdr->szBlkCol + blockCol->offset, + reader->config->bufArr[1], size1); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbDecmprColData(reader->config->bufArr[1], blockCol, hdr->cmprAlg, hdr->nRow, colData, + &reader->config->bufArr[2]); + TSDB_CHECK_CODE(code, lino, _exit); + } + } + } + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(reader->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbSttFileReadTombBlock(SSttFileReader *reader, const STombBlk *tombBlk, STombBlock *tombBlock) { + int32_t code = 0; + int32_t lino = 0; + + code = tRealloc(&reader->config->bufArr[0], tombBlk->dp->size); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbReadFile(reader->fd, tombBlk->dp->offset, reader->config->bufArr[0], tombBlk->dp->size); + if (code) TSDB_CHECK_CODE(code, lino, _exit); + + int64_t size = 0; + tTombBlockClear(tombBlock); + for (int32_t i = 0; i < ARRAY_SIZE(tombBlock->dataArr); ++i) { + code = tsdbDecmprData(reader->config->bufArr[0] + size, tombBlk->size[i], TSDB_DATA_TYPE_BIGINT, tombBlk->cmprAlg, + &reader->config->bufArr[1], sizeof(int64_t) * tombBlk->numRec, &reader->config->bufArr[2]); + TSDB_CHECK_CODE(code, lino, _exit); + + code = TARRAY2_APPEND_BATCH(&tombBlock->dataArr[i], reader->config->bufArr[1], tombBlk->numRec); + TSDB_CHECK_CODE(code, lino, _exit); + + size += tombBlk->size[i]; + } + + ASSERT(size == tombBlk->dp->size); +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(reader->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbSttFileReadStatisBlock(SSttFileReader *reader, const SStatisBlk *statisBlk, STbStatisBlock *statisBlock) { + int32_t code = 0; + int32_t lino = 0; + + code = tRealloc(&reader->config->bufArr[0], statisBlk->dp->size); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbReadFile(reader->fd, statisBlk->dp->offset, reader->config->bufArr[0], statisBlk->dp->size); + TSDB_CHECK_CODE(code, lino, _exit); + + int64_t size = 0; + tStatisBlockClear(statisBlock); + for (int32_t i = 0; i < ARRAY_SIZE(statisBlock->dataArr); ++i) { + code = + tsdbDecmprData(reader->config->bufArr[0] + size, statisBlk->size[i], TSDB_DATA_TYPE_BIGINT, statisBlk->cmprAlg, + &reader->config->bufArr[1], sizeof(int64_t) * statisBlk->numRec, &reader->config->bufArr[2]); + TSDB_CHECK_CODE(code, lino, _exit); + + code = TARRAY2_APPEND_BATCH(statisBlock->dataArr + i, reader->config->bufArr[1], statisBlk->numRec); + TSDB_CHECK_CODE(code, lino, _exit); + + size += statisBlk->size[i]; + } + + ASSERT(size == statisBlk->dp->size); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(reader->config->tsdb->pVnode), lino, code); + } + return code; +} + +// SSttFWriter ============================================================ +struct SSttFileWriter { + SSttFileWriterConfig config[1]; + struct { + bool opened; + TABLEID tbid[1]; + } ctx[1]; + // file + STsdbFD *fd; + STFile file[1]; + // data + SSttFooter footer[1]; + TTombBlkArray tombBlkArray[1]; + TSttBlkArray sttBlkArray[1]; + TStatisBlkArray statisBlkArray[1]; + STombBlock tombBlock[1]; + STbStatisBlock staticBlock[1]; + SBlockData blockData[1]; + // helper data + SSkmInfo skmTb[1]; + SSkmInfo skmRow[1]; + uint8_t *bufArr[5]; +}; + +int32_t tsdbFileDoWriteBlockData(STsdbFD *fd, SBlockData *blockData, int8_t cmprAlg, int64_t *fileSize, + TSttBlkArray *sttBlkArray, uint8_t **bufArr) { + if (blockData->nRow == 0) return 0; + + int32_t code = 0; + + SSttBlk sttBlk[1] = {{ + .suid = blockData->suid, + .minUid = blockData->uid ? blockData->uid : blockData->aUid[0], + .maxUid = blockData->uid ? blockData->uid : blockData->aUid[blockData->nRow - 1], + .minKey = blockData->aTSKEY[0], + .maxKey = blockData->aTSKEY[0], + .minVer = blockData->aVersion[0], + .maxVer = blockData->aVersion[0], + .nRow = blockData->nRow, + }}; + + for (int32_t iRow = 1; iRow < blockData->nRow; iRow++) { + if (sttBlk->minKey > blockData->aTSKEY[iRow]) sttBlk->minKey = blockData->aTSKEY[iRow]; + if (sttBlk->maxKey < blockData->aTSKEY[iRow]) sttBlk->maxKey = blockData->aTSKEY[iRow]; + if (sttBlk->minVer > blockData->aVersion[iRow]) sttBlk->minVer = blockData->aVersion[iRow]; + if (sttBlk->maxVer < blockData->aVersion[iRow]) sttBlk->maxVer = blockData->aVersion[iRow]; + } + + int32_t sizeArr[5] = {0}; + code = tCmprBlockData(blockData, cmprAlg, NULL, NULL, bufArr, sizeArr); + if (code) return code; + + sttBlk->bInfo.offset = *fileSize; + sttBlk->bInfo.szKey = sizeArr[2] + sizeArr[3]; + sttBlk->bInfo.szBlock = sizeArr[0] + sizeArr[1] + sttBlk->bInfo.szKey; + + for (int32_t i = 3; i >= 0; i--) { + if (sizeArr[i]) { + code = tsdbWriteFile(fd, *fileSize, bufArr[i], sizeArr[i]); + if (code) return code; + *fileSize += sizeArr[i]; + } + } + + code = TARRAY2_APPEND_PTR(sttBlkArray, sttBlk); + if (code) return code; + + tBlockDataClear(blockData); + + return 0; +} + +static int32_t tsdbSttFileDoWriteBlockData(SSttFileWriter *writer) { + if (writer->blockData->nRow == 0) return 0; + + int32_t code = 0; + int32_t lino = 0; + + code = tsdbFileDoWriteBlockData(writer->fd, writer->blockData, writer->config->cmprAlg, &writer->file->size, + writer->sttBlkArray, writer->config->bufArr); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbSttFileDoWriteStatisBlock(SSttFileWriter *writer) { + if (STATIS_BLOCK_SIZE(writer->staticBlock) == 0) return 0; + + int32_t code = 0; + int32_t lino = 0; + + SStatisBlk statisBlk[1] = {{ + .dp[0] = + { + .offset = writer->file->size, + .size = 0, + }, + .minTbid = + { + .suid = TARRAY2_FIRST(writer->staticBlock->suid), + .uid = TARRAY2_FIRST(writer->staticBlock->uid), + }, + .maxTbid = + { + .suid = TARRAY2_LAST(writer->staticBlock->suid), + .uid = TARRAY2_LAST(writer->staticBlock->uid), + }, + .numRec = STATIS_BLOCK_SIZE(writer->staticBlock), + .cmprAlg = writer->config->cmprAlg, + }}; + + for (int32_t i = 0; i < STATIS_RECORD_NUM_ELEM; i++) { + code = tsdbCmprData((uint8_t *)TARRAY2_DATA(writer->staticBlock->dataArr + i), + TARRAY2_DATA_LEN(&writer->staticBlock->dataArr[i]), TSDB_DATA_TYPE_BIGINT, statisBlk->cmprAlg, + &writer->config->bufArr[0], 0, &statisBlk->size[i], &writer->config->bufArr[1]); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbWriteFile(writer->fd, writer->file->size, writer->config->bufArr[0], statisBlk->size[i]); + TSDB_CHECK_CODE(code, lino, _exit); + + statisBlk->dp->size += statisBlk->size[i]; + writer->file->size += statisBlk->size[i]; + } + + code = TARRAY2_APPEND_PTR(writer->statisBlkArray, statisBlk); + TSDB_CHECK_CODE(code, lino, _exit); + + tStatisBlockClear(writer->staticBlock); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbFileWriteTombBlock(STsdbFD *fd, STombBlock *tombBlock, int8_t cmprAlg, int64_t *fileSize, + TTombBlkArray *tombBlkArray, uint8_t **bufArr) { + int32_t code; + + if (TOMB_BLOCK_SIZE(tombBlock) == 0) return 0; + + STombBlk tombBlk[1] = {{ + .dp[0] = + { + .offset = *fileSize, + .size = 0, + }, + .minTbid = + { + .suid = TARRAY2_FIRST(tombBlock->suid), + .uid = TARRAY2_FIRST(tombBlock->uid), + }, + .maxTbid = + { + .suid = TARRAY2_LAST(tombBlock->suid), + .uid = TARRAY2_LAST(tombBlock->uid), + }, + .minVer = TARRAY2_FIRST(tombBlock->version), + .maxVer = TARRAY2_FIRST(tombBlock->version), + .numRec = TOMB_BLOCK_SIZE(tombBlock), + .cmprAlg = cmprAlg, + }}; + + for (int32_t i = 1; i < TOMB_BLOCK_SIZE(tombBlock); i++) { + if (tombBlk->minVer > TARRAY2_GET(tombBlock->version, i)) { + tombBlk->minVer = TARRAY2_GET(tombBlock->version, i); + } + if (tombBlk->maxVer < TARRAY2_GET(tombBlock->version, i)) { + tombBlk->maxVer = TARRAY2_GET(tombBlock->version, i); + } + } + + for (int32_t i = 0; i < ARRAY_SIZE(tombBlock->dataArr); i++) { + code = tsdbCmprData((uint8_t *)TARRAY2_DATA(&tombBlock->dataArr[i]), TARRAY2_DATA_LEN(&tombBlock->dataArr[i]), + TSDB_DATA_TYPE_BIGINT, tombBlk->cmprAlg, &bufArr[0], 0, &tombBlk->size[i], &bufArr[1]); + if (code) return code; + + code = tsdbWriteFile(fd, *fileSize, bufArr[0], tombBlk->size[i]); + if (code) return code; + + tombBlk->dp->size += tombBlk->size[i]; + *fileSize += tombBlk->size[i]; + } + + code = TARRAY2_APPEND_PTR(tombBlkArray, tombBlk); + if (code) return code; + + tTombBlockClear(tombBlock); + return 0; +} + +static int32_t tsdbSttFileDoWriteTombBlock(SSttFileWriter *writer) { + if (TOMB_BLOCK_SIZE(writer->tombBlock) == 0) return 0; + + int32_t code = 0; + int32_t lino = 0; + + code = tsdbFileWriteTombBlock(writer->fd, writer->tombBlock, writer->config->cmprAlg, &writer->file->size, + writer->tombBlkArray, writer->config->bufArr); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbFileWriteSttBlk(STsdbFD *fd, const TSttBlkArray *sttBlkArray, SFDataPtr *ptr, int64_t *fileSize) { + ptr->size = TARRAY2_DATA_LEN(sttBlkArray); + if (ptr->size > 0) { + ptr->offset = *fileSize; + + int32_t code = tsdbWriteFile(fd, *fileSize, (const uint8_t *)TARRAY2_DATA(sttBlkArray), ptr->size); + if (code) { + return code; + } + + *fileSize += ptr->size; + } + return 0; +} + +static int32_t tsdbSttFileDoWriteSttBlk(SSttFileWriter *writer) { + int32_t code = 0; + int32_t lino; + + code = tsdbFileWriteSttBlk(writer->fd, writer->sttBlkArray, writer->footer->sttBlkPtr, &writer->file->size); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbSttFileDoWriteStatisBlk(SSttFileWriter *writer) { + int32_t code = 0; + int32_t lino; + + writer->footer->statisBlkPtr->size = TARRAY2_DATA_LEN(writer->statisBlkArray); + if (writer->footer->statisBlkPtr->size) { + writer->footer->statisBlkPtr->offset = writer->file->size; + code = tsdbWriteFile(writer->fd, writer->file->size, (const uint8_t *)TARRAY2_DATA(writer->statisBlkArray), + writer->footer->statisBlkPtr->size); + TSDB_CHECK_CODE(code, lino, _exit); + writer->file->size += writer->footer->statisBlkPtr->size; + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbFileWriteTombBlk(STsdbFD *fd, const TTombBlkArray *tombBlkArray, SFDataPtr *ptr, int64_t *fileSize) { + ptr->size = TARRAY2_DATA_LEN(tombBlkArray); + if (ptr->size > 0) { + ptr->offset = *fileSize; + + int32_t code = tsdbWriteFile(fd, *fileSize, (const uint8_t *)TARRAY2_DATA(tombBlkArray), ptr->size); + if (code) { + return code; + } + + *fileSize += ptr->size; + } + return 0; +} + +static int32_t tsdbSttFileDoWriteTombBlk(SSttFileWriter *writer) { + int32_t code = 0; + int32_t lino = 0; + + code = tsdbFileWriteTombBlk(writer->fd, writer->tombBlkArray, writer->footer->tombBlkPtr, &writer->file->size); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbFileWriteSttFooter(STsdbFD *fd, const SSttFooter *footer, int64_t *fileSize) { + int32_t code = tsdbWriteFile(fd, *fileSize, (const uint8_t *)footer, sizeof(*footer)); + if (code) return code; + *fileSize += sizeof(*footer); + return 0; +} + +static int32_t tsdbSttFileDoWriteFooter(SSttFileWriter *writer) { + return tsdbFileWriteSttFooter(writer->fd, writer->footer, &writer->file->size); +} + +static int32_t tsdbSttFWriterDoOpen(SSttFileWriter *writer) { + int32_t code = 0; + int32_t lino = 0; + + // set + if (!writer->config->skmTb) writer->config->skmTb = writer->skmTb; + if (!writer->config->skmRow) writer->config->skmRow = writer->skmRow; + if (!writer->config->bufArr) writer->config->bufArr = writer->bufArr; + + writer->file[0] = (STFile){ + .type = TSDB_FTYPE_STT, + .did = writer->config->did, + .fid = writer->config->fid, + .cid = writer->config->cid, + .size = 0, + .stt[0] = + { + .level = writer->config->level, + }, + }; + + // open file + int32_t flag = TD_FILE_READ | TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC; + char fname[TSDB_FILENAME_LEN]; + + tsdbTFileName(writer->config->tsdb, writer->file, fname); + code = tsdbOpenFile(fname, writer->config->szPage, flag, &writer->fd); + TSDB_CHECK_CODE(code, lino, _exit); + + uint8_t hdr[TSDB_FHDR_SIZE] = {0}; + code = tsdbWriteFile(writer->fd, 0, hdr, sizeof(hdr)); + TSDB_CHECK_CODE(code, lino, _exit); + writer->file->size += sizeof(hdr); + + writer->ctx->opened = true; + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +static void tsdbSttFWriterDoClose(SSttFileWriter *writer) { + ASSERT(writer->fd == NULL); + + for (int32_t i = 0; i < ARRAY_SIZE(writer->bufArr); ++i) { + tFree(writer->bufArr[i]); + } + tDestroyTSchema(writer->skmRow->pTSchema); + tDestroyTSchema(writer->skmTb->pTSchema); + tTombBlockDestroy(writer->tombBlock); + tStatisBlockDestroy(writer->staticBlock); + tBlockDataDestroy(writer->blockData); + TARRAY2_DESTROY(writer->tombBlkArray, NULL); + TARRAY2_DESTROY(writer->statisBlkArray, NULL); + TARRAY2_DESTROY(writer->sttBlkArray, NULL); +} + +static int32_t tsdbSttFileDoUpdateHeader(SSttFileWriter *writer) { + // TODO + return 0; +} + +static int32_t tsdbSttFWriterCloseCommit(SSttFileWriter *writer, TFileOpArray *opArray) { + int32_t lino; + int32_t code; + + code = tsdbSttFileDoWriteBlockData(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbSttFileDoWriteStatisBlock(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbSttFileDoWriteTombBlock(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbSttFileDoWriteSttBlk(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbSttFileDoWriteStatisBlk(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbSttFileDoWriteTombBlk(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbSttFileDoWriteFooter(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbSttFileDoUpdateHeader(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbFsyncFile(writer->fd); + TSDB_CHECK_CODE(code, lino, _exit); + + tsdbCloseFile(&writer->fd); + + ASSERT(writer->file->size > 0); + STFileOp op = (STFileOp){ + .optype = TSDB_FOP_CREATE, + .fid = writer->config->fid, + .nf = writer->file[0], + }; + + code = TARRAY2_APPEND(opArray, op); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbSttFWriterCloseAbort(SSttFileWriter *writer) { + char fname[TSDB_FILENAME_LEN]; + tsdbTFileName(writer->config->tsdb, writer->file, fname); + tsdbCloseFile(&writer->fd); + taosRemoveFile(fname); + return 0; +} + +int32_t tsdbSttFileWriterOpen(const SSttFileWriterConfig *config, SSttFileWriter **writer) { + writer[0] = taosMemoryCalloc(1, sizeof(*writer[0])); + if (writer[0] == NULL) return TSDB_CODE_OUT_OF_MEMORY; + + writer[0]->config[0] = config[0]; + writer[0]->ctx->opened = false; + return 0; +} + +int32_t tsdbSttFileWriterClose(SSttFileWriter **writer, int8_t abort, TFileOpArray *opArray) { + int32_t code = 0; + int32_t lino = 0; + + if (writer[0]->ctx->opened) { + if (abort) { + code = tsdbSttFWriterCloseAbort(writer[0]); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + code = tsdbSttFWriterCloseCommit(writer[0], opArray); + TSDB_CHECK_CODE(code, lino, _exit); + } + tsdbSttFWriterDoClose(writer[0]); + } + taosMemoryFree(writer[0]); + writer[0] = NULL; + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer[0]->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbSttFileWriteRow(SSttFileWriter *writer, SRowInfo *row) { + int32_t code = 0; + int32_t lino = 0; + + if (!writer->ctx->opened) { + code = tsdbSttFWriterDoOpen(writer); + TSDB_CHECK_CODE(code, lino, _exit); + } + + if (!TABLE_SAME_SCHEMA(row->suid, row->uid, writer->ctx->tbid->suid, writer->ctx->tbid->uid)) { + code = tsdbSttFileDoWriteBlockData(writer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbUpdateSkmTb(writer->config->tsdb, (TABLEID *)row, writer->config->skmTb); + TSDB_CHECK_CODE(code, lino, _exit); + + TABLEID id = {.suid = row->suid, .uid = row->suid ? 0 : row->uid}; + code = tBlockDataInit(writer->blockData, &id, writer->config->skmTb->pTSchema, NULL, 0); + TSDB_CHECK_CODE(code, lino, _exit); + } + + TSDBKEY key[1]; + if (row->row.type == TSDBROW_ROW_FMT) { + key->ts = row->row.pTSRow->ts; + key->version = row->row.version; + } else { + key->ts = row->row.pBlockData->aTSKEY[row->row.iRow]; + key->version = row->row.pBlockData->aVersion[row->row.iRow]; + } + + if (writer->ctx->tbid->uid != row->uid) { + writer->ctx->tbid->suid = row->suid; + writer->ctx->tbid->uid = row->uid; + + if (STATIS_BLOCK_SIZE(writer->staticBlock) >= writer->config->maxRow) { + code = tsdbSttFileDoWriteStatisBlock(writer); + TSDB_CHECK_CODE(code, lino, _exit); + } + + STbStatisRecord record = { + .suid = row->suid, + .uid = row->uid, + .firstKey = key->ts, + .lastKey = key->ts, + .count = 1, + }; + code = tStatisBlockPut(writer->staticBlock, &record); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + ASSERT(key->ts >= TARRAY2_LAST(writer->staticBlock->lastKey)); + + if (key->ts > TARRAY2_LAST(writer->staticBlock->lastKey)) { + TARRAY2_LAST(writer->staticBlock->count)++; + TARRAY2_LAST(writer->staticBlock->lastKey) = key->ts; + } + } + + if (row->row.type == TSDBROW_ROW_FMT) { + code = tsdbUpdateSkmRow(writer->config->tsdb, writer->ctx->tbid, // + TSDBROW_SVERSION(&row->row), writer->config->skmRow); + TSDB_CHECK_CODE(code, lino, _exit); + } + + // row to col conversion + if (key->version <= writer->config->compactVersion // + && writer->blockData->nRow > 0 // + && writer->blockData->aTSKEY[writer->blockData->nRow - 1] == key->ts // + && (writer->blockData->uid // + ? writer->blockData->uid // + : writer->blockData->aUid[writer->blockData->nRow - 1]) == row->uid // + ) { + code = tBlockDataUpdateRow(writer->blockData, &row->row, writer->config->skmRow->pTSchema); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + if (writer->blockData->nRow >= writer->config->maxRow) { + code = tsdbSttFileDoWriteBlockData(writer); + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tBlockDataAppendRow(writer->blockData, &row->row, writer->config->skmRow->pTSchema, row->uid); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbSttFileWriteBlockData(SSttFileWriter *writer, SBlockData *bdata) { + int32_t code = 0; + int32_t lino = 0; + + SRowInfo row[1]; + row->suid = bdata->suid; + for (int32_t i = 0; i < bdata->nRow; i++) { + row->uid = bdata->uid ? bdata->uid : bdata->aUid[i]; + row->row = tsdbRowFromBlockData(bdata, i); + + code = tsdbSttFileWriteRow(writer, row); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } + return code; +} + +int32_t tsdbSttFileWriteTombRecord(SSttFileWriter *writer, const STombRecord *record) { + int32_t code; + int32_t lino; + + if (!writer->ctx->opened) { + code = tsdbSttFWriterDoOpen(writer); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + if (writer->blockData->nRow > 0) { + code = tsdbSttFileDoWriteBlockData(writer); + TSDB_CHECK_CODE(code, lino, _exit); + } + + if (STATIS_BLOCK_SIZE(writer->staticBlock) > 0) { + code = tsdbSttFileDoWriteStatisBlock(writer); + TSDB_CHECK_CODE(code, lino, _exit); + } + } + + code = tTombBlockPut(writer->tombBlock, record); + TSDB_CHECK_CODE(code, lino, _exit); + + if (TOMB_BLOCK_SIZE(writer->tombBlock) >= writer->config->maxRow) { + code = tsdbSttFileDoWriteTombBlock(writer); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); + } else { + tsdbTrace("vgId:%d write tomb record to stt file:%s, cid:%" PRId64 ", suid:%" PRId64 ", uid:%" PRId64 + ", version:%" PRId64, + TD_VID(writer->config->tsdb->pVnode), writer->fd->path, writer->config->cid, record->suid, record->uid, + record->version); + } + return code; +} + +bool tsdbSttFileWriterIsOpened(SSttFileWriter *writer) { return writer->ctx->opened; } \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbSttFileRW.h b/source/dnode/vnode/src/tsdb/tsdbSttFileRW.h new file mode 100644 index 0000000000000000000000000000000000000000..242b55795c99be8c416c9238df2cd0356cb89004 --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbSttFileRW.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbFS2.h" +#include "tsdbUtil2.h" + +#ifndef _TSDB_STT_FILE_RW_H +#define _TSDB_STT_FILE_RW_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef TARRAY2(SSttBlk) TSttBlkArray; +typedef TARRAY2(SStatisBlk) TStatisBlkArray; + +typedef struct { + SFDataPtr sttBlkPtr[1]; + SFDataPtr statisBlkPtr[1]; + SFDataPtr tombBlkPtr[1]; + SFDataPtr rsrvd[2]; +} SSttFooter; + +// SSttFileReader ========================================== +typedef struct SSttFileReader SSttFileReader; +typedef struct SSttFileReaderConfig SSttFileReaderConfig; +typedef TARRAY2(SSttFileReader *) TSttFileReaderArray; + +// SSttFileReader +int32_t tsdbSttFileReaderOpen(const char *fname, const SSttFileReaderConfig *config, SSttFileReader **reader); +int32_t tsdbSttFileReaderClose(SSttFileReader **reader); + +// SSttSegReader +int32_t tsdbSttFileReadSttBlk(SSttFileReader *reader, const TSttBlkArray **sttBlkArray); +int32_t tsdbSttFileReadStatisBlk(SSttFileReader *reader, const TStatisBlkArray **statisBlkArray); +int32_t tsdbSttFileReadTombBlk(SSttFileReader *reader, const TTombBlkArray **delBlkArray); + +int32_t tsdbSttFileReadBlockData(SSttFileReader *reader, const SSttBlk *sttBlk, SBlockData *bData); +int32_t tsdbSttFileReadBlockDataByColumn(SSttFileReader *reader, const SSttBlk *sttBlk, SBlockData *bData, + STSchema *pTSchema, int16_t cids[], int32_t ncid); +int32_t tsdbSttFileReadStatisBlock(SSttFileReader *reader, const SStatisBlk *statisBlk, STbStatisBlock *sData); +int32_t tsdbSttFileReadTombBlock(SSttFileReader *reader, const STombBlk *delBlk, STombBlock *dData); + +struct SSttFileReaderConfig { + STsdb *tsdb; + int32_t szPage; + STFile file[1]; + uint8_t **bufArr; +}; + +// SSttFileWriter ========================================== +typedef struct SSttFileWriter SSttFileWriter; +typedef struct SSttFileWriterConfig SSttFileWriterConfig; + +int32_t tsdbSttFileWriterOpen(const SSttFileWriterConfig *config, SSttFileWriter **writer); +int32_t tsdbSttFileWriterClose(SSttFileWriter **writer, int8_t abort, TFileOpArray *opArray); +int32_t tsdbSttFileWriteRow(SSttFileWriter *writer, SRowInfo *row); +int32_t tsdbSttFileWriteBlockData(SSttFileWriter *writer, SBlockData *pBlockData); +int32_t tsdbSttFileWriteTombRecord(SSttFileWriter *writer, const STombRecord *record); +bool tsdbSttFileWriterIsOpened(SSttFileWriter *writer); + +struct SSttFileWriterConfig { + STsdb *tsdb; + int32_t maxRow; + int32_t szPage; + int8_t cmprAlg; + int64_t compactVersion; + SDiskID did; + int32_t fid; + int64_t cid; + int32_t level; + SSkmInfo *skmTb; + SSkmInfo *skmRow; + uint8_t **bufArr; +}; + +#ifdef __cplusplus +} +#endif + +#endif /*_TSDB_STT_FILE_RW_H*/ \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbUpgrade.c b/source/dnode/vnode/src/tsdb/tsdbUpgrade.c new file mode 100644 index 0000000000000000000000000000000000000000..59ba51c371c9038c396d67a1799c219628032ab1 --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbUpgrade.c @@ -0,0 +1,640 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbUpgrade.h" + +// old +extern void tsdbGetCurrentFName(STsdb *pTsdb, char *current, char *current_t); +extern int32_t tsdbReadDataBlockEx(SDataFReader *pReader, SDataBlk *pDataBlk, SBlockData *pBlockData); + +// new +extern int32_t save_fs(const TFileSetArray *arr, const char *fname); +extern int32_t current_fname(STsdb *pTsdb, char *fname, EFCurrentT ftype); +extern int32_t tsdbFileWriteBrinBlock(STsdbFD *fd, SBrinBlock *brinBlock, int8_t cmprAlg, int64_t *fileSize, + TBrinBlkArray *brinBlkArray, uint8_t **bufArr); +extern int32_t tsdbFileWriteBrinBlk(STsdbFD *fd, TBrinBlkArray *brinBlkArray, SFDataPtr *ptr, int64_t *fileSize); +extern int32_t tsdbFileWriteHeadFooter(STsdbFD *fd, int64_t *fileSize, const SHeadFooter *footer); +extern int32_t tsdbSttLvlInit(int32_t level, SSttLvl **lvl); +extern int32_t tsdbSttLvlClear(SSttLvl **lvl); +extern int32_t tsdbFileWriteSttBlk(STsdbFD *fd, const TSttBlkArray *sttBlkArray, SFDataPtr *ptr, int64_t *fileSize); +extern int32_t tsdbFileWriteSttFooter(STsdbFD *fd, const SSttFooter *footer, int64_t *fileSize); +extern int32_t tsdbFileWriteTombBlock(STsdbFD *fd, STombBlock *tombBlock, int8_t cmprAlg, int64_t *fileSize, + TTombBlkArray *tombBlkArray, uint8_t **bufArr); +extern int32_t tsdbFileWriteTombBlk(STsdbFD *fd, const TTombBlkArray *tombBlkArray, SFDataPtr *ptr, int64_t *fileSize); +extern int32_t tsdbFileWriteTombFooter(STsdbFD *fd, const STombFooter *footer, int64_t *fileSize); + +static int32_t tsdbUpgradeHead(STsdb *tsdb, SDFileSet *pDFileSet, SDataFReader *reader, STFileSet *fset) { + int32_t code = 0; + int32_t lino = 0; + + // init + struct { + // config + int32_t maxRow; + int8_t cmprAlg; + int32_t szPage; + uint8_t *bufArr[8]; + // reader + SArray *aBlockIdx; + SMapData mDataBlk[1]; + SBlockData blockData[1]; + // writer + STsdbFD *fd; + SBrinBlock brinBlock[1]; + TBrinBlkArray brinBlkArray[1]; + SHeadFooter footer[1]; + } ctx[1] = {{ + .maxRow = tsdb->pVnode->config.tsdbCfg.maxRows, + .cmprAlg = tsdb->pVnode->config.tsdbCfg.compression, + .szPage = tsdb->pVnode->config.tsdbPageSize, + }}; + + // read SBlockIdx array + if ((ctx->aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx))) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbReadBlockIdx(reader, ctx->aBlockIdx); + TSDB_CHECK_CODE(code, lino, _exit); + + if (taosArrayGetSize(ctx->aBlockIdx) > 0) { + // init/open file fd + STFile file = { + .type = TSDB_FTYPE_HEAD, + .did = pDFileSet->diskId, + .fid = fset->fid, + .cid = pDFileSet->pHeadF->commitID, + .size = pDFileSet->pHeadF->size, + }; + + code = tsdbTFileObjInit(tsdb, &file, &fset->farr[TSDB_FTYPE_HEAD]); + TSDB_CHECK_CODE(code, lino, _exit); + + // open fd + char fname[TSDB_FILENAME_LEN]; + tsdbTFileName(tsdb, &file, fname); + + code = tsdbOpenFile(fname, ctx->szPage, TD_FILE_READ | TD_FILE_WRITE, &ctx->fd); + TSDB_CHECK_CODE(code, lino, _exit); + + // convert + for (int32_t iBlockIdx = 0; iBlockIdx < taosArrayGetSize(ctx->aBlockIdx); ++iBlockIdx) { + SBlockIdx *pBlockIdx = taosArrayGet(ctx->aBlockIdx, iBlockIdx); + + code = tsdbReadDataBlk(reader, pBlockIdx, ctx->mDataBlk); + TSDB_CHECK_CODE(code, lino, _exit); + + for (int32_t iDataBlk = 0; iDataBlk < ctx->mDataBlk->nItem; ++iDataBlk) { + SDataBlk dataBlk[1]; + tMapDataGetItemByIdx(ctx->mDataBlk, iDataBlk, dataBlk, tGetDataBlk); + + SBrinRecord record = { + .suid = pBlockIdx->suid, + .uid = pBlockIdx->uid, + .firstKey = dataBlk->minKey.ts, + .firstKeyVer = dataBlk->minKey.version, + .lastKey = dataBlk->maxKey.ts, + .lastKeyVer = dataBlk->maxKey.version, + .minVer = dataBlk->minVer, + .maxVer = dataBlk->maxVer, + .blockOffset = dataBlk->aSubBlock->offset, + .smaOffset = dataBlk->smaInfo.offset, + .blockSize = dataBlk->aSubBlock->szBlock, + .blockKeySize = dataBlk->aSubBlock->szKey, + .smaSize = dataBlk->smaInfo.size, + .numRow = dataBlk->nRow, + .count = dataBlk->nRow, + }; + + if (dataBlk->hasDup) { + code = tsdbReadDataBlockEx(reader, dataBlk, ctx->blockData); + TSDB_CHECK_CODE(code, lino, _exit); + + record.count = 1; + for (int32_t i = 1; i < ctx->blockData->nRow; ++i) { + if (ctx->blockData->aTSKEY[i] != ctx->blockData->aTSKEY[i - 1]) { + record.count++; + } + } + } + + code = tBrinBlockPut(ctx->brinBlock, &record); + TSDB_CHECK_CODE(code, lino, _exit); + + if (BRIN_BLOCK_SIZE(ctx->brinBlock) >= ctx->maxRow) { + code = tsdbFileWriteBrinBlock(ctx->fd, ctx->brinBlock, ctx->cmprAlg, &fset->farr[TSDB_FTYPE_HEAD]->f->size, + ctx->brinBlkArray, ctx->bufArr); + TSDB_CHECK_CODE(code, lino, _exit); + } + } + } + + if (BRIN_BLOCK_SIZE(ctx->brinBlock) > 0) { + code = tsdbFileWriteBrinBlock(ctx->fd, ctx->brinBlock, ctx->cmprAlg, &fset->farr[TSDB_FTYPE_HEAD]->f->size, + ctx->brinBlkArray, ctx->bufArr); + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbFileWriteBrinBlk(ctx->fd, ctx->brinBlkArray, ctx->footer->brinBlkPtr, + &fset->farr[TSDB_FTYPE_HEAD]->f->size); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbFileWriteHeadFooter(ctx->fd, &fset->farr[TSDB_FTYPE_HEAD]->f->size, ctx->footer); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbFsyncFile(ctx->fd); + TSDB_CHECK_CODE(code, lino, _exit); + + tsdbCloseFile(&ctx->fd); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); + } + TARRAY2_DESTROY(ctx->brinBlkArray, NULL); + tBrinBlockDestroy(ctx->brinBlock); + tBlockDataDestroy(ctx->blockData); + tMapDataClear(ctx->mDataBlk); + taosArrayDestroy(ctx->aBlockIdx); + for (int32_t i = 0; i < ARRAY_SIZE(ctx->bufArr); ++i) { + tFree(ctx->bufArr[i]); + } + return code; +} + +static int32_t tsdbUpgradeData(STsdb *tsdb, SDFileSet *pDFileSet, SDataFReader *reader, STFileSet *fset) { + int32_t code = 0; + int32_t lino = 0; + + if (fset->farr[TSDB_FTYPE_HEAD] == NULL) { + return 0; + } + + STFile file = { + .type = TSDB_FTYPE_DATA, + .did = pDFileSet->diskId, + .fid = fset->fid, + .cid = pDFileSet->pDataF->commitID, + .size = pDFileSet->pDataF->size, + }; + + code = tsdbTFileObjInit(tsdb, &file, &fset->farr[TSDB_FTYPE_DATA]); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbUpgradeSma(STsdb *tsdb, SDFileSet *pDFileSet, SDataFReader *reader, STFileSet *fset) { + int32_t code = 0; + int32_t lino = 0; + + if (fset->farr[TSDB_FTYPE_HEAD] == NULL) { + return 0; + } + + STFile file = { + .type = TSDB_FTYPE_SMA, + .did = pDFileSet->diskId, + .fid = fset->fid, + .cid = pDFileSet->pSmaF->commitID, + .size = pDFileSet->pSmaF->size, + }; + + code = tsdbTFileObjInit(tsdb, &file, &fset->farr[TSDB_FTYPE_SMA]); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbUpgradeSttFile(STsdb *tsdb, SDFileSet *pDFileSet, SDataFReader *reader, STFileSet *fset, + int32_t iStt, SSttLvl *lvl) { + int32_t code = 0; + int32_t lino = 0; + + SArray *aSttBlk = taosArrayInit(0, sizeof(SSttBlk)); + if (aSttBlk == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbReadSttBlk(reader, iStt, aSttBlk); + TSDB_CHECK_CODE(code, lino, _exit); + + if (taosArrayGetSize(aSttBlk) > 0) { + SSttFile *pSttF = pDFileSet->aSttF[iStt]; + STFileObj *fobj; + struct { + int32_t szPage; + // writer + STsdbFD *fd; + TSttBlkArray sttBlkArray[1]; + SSttFooter footer[1]; + } ctx[1] = {{ + .szPage = tsdb->pVnode->config.tsdbPageSize, + }}; + + STFile file = { + .type = TSDB_FTYPE_STT, + .did = pDFileSet->diskId, + .fid = fset->fid, + .cid = pSttF->commitID, + .size = pSttF->size, + }; + code = tsdbTFileObjInit(tsdb, &file, &fobj); + TSDB_CHECK_CODE(code, lino, _exit1); + + code = tsdbOpenFile(fobj->fname, ctx->szPage, TD_FILE_READ | TD_FILE_WRITE, &ctx->fd); + TSDB_CHECK_CODE(code, lino, _exit1); + + for (int32_t iSttBlk = 0; iSttBlk < taosArrayGetSize(aSttBlk); iSttBlk++) { + code = TARRAY2_APPEND_PTR(ctx->sttBlkArray, (SSttBlk *)taosArrayGet(aSttBlk, iSttBlk)); + TSDB_CHECK_CODE(code, lino, _exit1); + } + + code = tsdbFileWriteSttBlk(ctx->fd, ctx->sttBlkArray, ctx->footer->sttBlkPtr, &fobj->f->size); + TSDB_CHECK_CODE(code, lino, _exit1); + + code = tsdbFileWriteSttFooter(ctx->fd, ctx->footer, &fobj->f->size); + TSDB_CHECK_CODE(code, lino, _exit1); + + code = tsdbFsyncFile(ctx->fd); + TSDB_CHECK_CODE(code, lino, _exit1); + + tsdbCloseFile(&ctx->fd); + + code = TARRAY2_APPEND(lvl->fobjArr, fobj); + TSDB_CHECK_CODE(code, lino, _exit1); + + _exit1: + TARRAY2_DESTROY(ctx->sttBlkArray, NULL); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); + } + taosArrayDestroy(aSttBlk); + return code; +} + +static int32_t tsdbUpgradeStt(STsdb *tsdb, SDFileSet *pDFileSet, SDataFReader *reader, STFileSet *fset) { + int32_t code = 0; + int32_t lino = 0; + + if (pDFileSet->nSttF == 0) { + return 0; + } + + SSttLvl *lvl; + code = tsdbSttLvlInit(0, &lvl); + TSDB_CHECK_CODE(code, lino, _exit); + + for (int32_t iStt = 0; iStt < pDFileSet->nSttF; ++iStt) { + code = tsdbUpgradeSttFile(tsdb, pDFileSet, reader, fset, iStt, lvl); + TSDB_CHECK_CODE(code, lino, _exit); + } + + if (TARRAY2_SIZE(lvl->fobjArr) > 0) { + code = TARRAY2_APPEND(fset->lvlArr, lvl); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + tsdbSttLvlClear(&lvl); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbUpgradeFileSet(STsdb *tsdb, SDFileSet *pDFileSet, TFileSetArray *fileSetArray) { + int32_t code = 0; + int32_t lino = 0; + + SDataFReader *reader; + STFileSet *fset; + + code = tsdbTFileSetInit(pDFileSet->fid, &fset); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbDataFReaderOpen(&reader, tsdb, pDFileSet); + TSDB_CHECK_CODE(code, lino, _exit); + + // .head + code = tsdbUpgradeHead(tsdb, pDFileSet, reader, fset); + TSDB_CHECK_CODE(code, lino, _exit); + + // .data + code = tsdbUpgradeData(tsdb, pDFileSet, reader, fset); + TSDB_CHECK_CODE(code, lino, _exit); + + // .sma + code = tsdbUpgradeSma(tsdb, pDFileSet, reader, fset); + TSDB_CHECK_CODE(code, lino, _exit); + + // .stt + if (pDFileSet->nSttF > 0) { + code = tsdbUpgradeStt(tsdb, pDFileSet, reader, fset); + TSDB_CHECK_CODE(code, lino, _exit); + } + + tsdbDataFReaderClose(&reader); + + code = TARRAY2_APPEND(fileSetArray, fset); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbUpgradeOpenTombFile(STsdb *tsdb, STFileSet *fset, STsdbFD **fd, STFileObj **fobj, bool *toStt) { + int32_t code = 0; + int32_t lino = 0; + + if (TARRAY2_SIZE(fset->lvlArr) == 0) { // to .tomb file + *toStt = false; + + STFile file = { + .type = TSDB_FTYPE_TOMB, + .did = fset->farr[TSDB_FTYPE_HEAD]->f->did, + .fid = fset->fid, + .cid = 0, + .size = 0, + }; + + code = tsdbTFileObjInit(tsdb, &file, fobj); + TSDB_CHECK_CODE(code, lino, _exit); + + fset->farr[TSDB_FTYPE_TOMB] = *fobj; + } else { // to .stt file + *toStt = true; + SSttLvl *lvl = TARRAY2_GET(fset->lvlArr, 0); + + STFile file = { + .type = TSDB_FTYPE_STT, + .did = TARRAY2_GET(lvl->fobjArr, 0)->f->did, + .fid = fset->fid, + .cid = 0, + .size = 0, + }; + + code = tsdbTFileObjInit(tsdb, &file, fobj); + TSDB_CHECK_CODE(code, lino, _exit); + + code = TARRAY2_APPEND(lvl->fobjArr, fobj[0]); + TSDB_CHECK_CODE(code, lino, _exit); + } + + char fname[TSDB_FILENAME_LEN] = {0}; + code = tsdbOpenFile(fobj[0]->fname, tsdb->pVnode->config.tsdbPageSize, + TD_FILE_READ | TD_FILE_WRITE | TD_FILE_TRUNC | TD_FILE_CREATE, fd); + TSDB_CHECK_CODE(code, lino, _exit); + + uint8_t hdr[TSDB_FHDR_SIZE] = {0}; + code = tsdbWriteFile(fd[0], 0, hdr, TSDB_FHDR_SIZE); + TSDB_CHECK_CODE(code, lino, _exit); + fobj[0]->f->size += TSDB_FHDR_SIZE; + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbDumpTombDataToFSet(STsdb *tsdb, SDelFReader *reader, SArray *aDelIdx, STFileSet *fset) { + int32_t code = 0; + int32_t lino = 0; + + struct { + // context + bool toStt; + int8_t cmprAlg; + int32_t maxRow; + int64_t minKey; + int64_t maxKey; + uint8_t *bufArr[8]; + // reader + SArray *aDelData; + // writer + STsdbFD *fd; + STFileObj *fobj; + STombBlock tombBlock[1]; + TTombBlkArray tombBlkArray[1]; + STombFooter tombFooter[1]; + SSttFooter sttFooter[1]; + } ctx[1] = {{ + .maxRow = tsdb->pVnode->config.tsdbCfg.maxRows, + .cmprAlg = tsdb->pVnode->config.tsdbCfg.compression, + }}; + + tsdbFidKeyRange(fset->fid, tsdb->keepCfg.days, tsdb->keepCfg.precision, &ctx->minKey, &ctx->maxKey); + + if ((ctx->aDelData = taosArrayInit(0, sizeof(SDelData))) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + for (int32_t iDelIdx = 0; iDelIdx < taosArrayGetSize(aDelIdx); iDelIdx++) { + SDelIdx *pDelIdx = (SDelIdx *)taosArrayGet(aDelIdx, iDelIdx); + + code = tsdbReadDelData(reader, pDelIdx, ctx->aDelData); + TSDB_CHECK_CODE(code, lino, _exit); + + for (int32_t iDelData = 0; iDelData < taosArrayGetSize(ctx->aDelData); iDelData++) { + SDelData *pDelData = (SDelData *)taosArrayGet(ctx->aDelData, iDelData); + + STombRecord record = { + .suid = pDelIdx->suid, + .uid = pDelIdx->uid, + .version = pDelData->version, + .skey = pDelData->sKey, + .ekey = pDelData->eKey, + }; + + code = tTombBlockPut(ctx->tombBlock, &record); + TSDB_CHECK_CODE(code, lino, _exit); + + if (TOMB_BLOCK_SIZE(ctx->tombBlock) > ctx->maxRow) { + if (ctx->fd == NULL) { + code = tsdbUpgradeOpenTombFile(tsdb, fset, &ctx->fd, &ctx->fobj, &ctx->toStt); + TSDB_CHECK_CODE(code, lino, _exit); + } + code = tsdbFileWriteTombBlock(ctx->fd, ctx->tombBlock, ctx->cmprAlg, &ctx->fobj->f->size, ctx->tombBlkArray, + ctx->bufArr); + TSDB_CHECK_CODE(code, lino, _exit); + } + } + } + + if (TOMB_BLOCK_SIZE(ctx->tombBlock) > 0) { + if (ctx->fd == NULL) { + code = tsdbUpgradeOpenTombFile(tsdb, fset, &ctx->fd, &ctx->fobj, &ctx->toStt); + TSDB_CHECK_CODE(code, lino, _exit); + } + code = tsdbFileWriteTombBlock(ctx->fd, ctx->tombBlock, ctx->cmprAlg, &ctx->fobj->f->size, ctx->tombBlkArray, + ctx->bufArr); + TSDB_CHECK_CODE(code, lino, _exit); + } + + if (ctx->fd != NULL) { + if (ctx->toStt) { + code = tsdbFileWriteTombBlk(ctx->fd, ctx->tombBlkArray, ctx->sttFooter->tombBlkPtr, &ctx->fobj->f->size); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbFileWriteSttFooter(ctx->fd, ctx->sttFooter, &ctx->fobj->f->size); + TSDB_CHECK_CODE(code, lino, _exit); + } else { + code = tsdbFileWriteTombBlk(ctx->fd, ctx->tombBlkArray, ctx->tombFooter->tombBlkPtr, &ctx->fobj->f->size); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbFileWriteTombFooter(ctx->fd, ctx->tombFooter, &ctx->fobj->f->size); + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbFsyncFile(ctx->fd); + TSDB_CHECK_CODE(code, lino, _exit); + + tsdbCloseFile(&ctx->fd); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); + } + for (int32_t i = 0; i < ARRAY_SIZE(ctx->bufArr); i++) { + tFree(ctx->bufArr[i]); + } + TARRAY2_DESTROY(ctx->tombBlkArray, NULL); + tTombBlockDestroy(ctx->tombBlock); + taosArrayDestroy(ctx->aDelData); + return code; +} + +static int32_t tsdbUpgradeTombFile(STsdb *tsdb, SDelFile *pDelFile, TFileSetArray *fileSetArray) { + int32_t code = 0; + int32_t lino = 0; + + SDelFReader *reader = NULL; + SArray *aDelIdx = NULL; + + if ((aDelIdx = taosArrayInit(0, sizeof(SDelIdx))) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + TSDB_CHECK_CODE(code, lino, _exit); + } + + code = tsdbDelFReaderOpen(&reader, pDelFile, tsdb); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbReadDelIdx(reader, aDelIdx); + TSDB_CHECK_CODE(code, lino, _exit); + + if (taosArrayGetSize(aDelIdx) > 0) { + STFileSet *fset; + TARRAY2_FOREACH(fileSetArray, fset) { + code = tsdbDumpTombDataToFSet(tsdb, reader, aDelIdx, fset); + TSDB_CHECK_CODE(code, lino, _exit); + } + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); + } + tsdbDelFReaderClose(&reader); + taosArrayDestroy(aDelIdx); + return code; +} + +static int32_t tsdbDoUpgradeFileSystem(STsdb *tsdb, TFileSetArray *fileSetArray) { + int32_t code = 0; + int32_t lino = 0; + + // upgrade each file set + for (int32_t i = 0; i < taosArrayGetSize(tsdb->fs.aDFileSet); i++) { + code = tsdbUpgradeFileSet(tsdb, taosArrayGet(tsdb->fs.aDFileSet, i), fileSetArray); + TSDB_CHECK_CODE(code, lino, _exit); + } + + // upgrade tomb file + if (tsdb->fs.pDelFile != NULL) { + code = tsdbUpgradeTombFile(tsdb, tsdb->fs.pDelFile, fileSetArray); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); + } + return code; +} + +static int32_t tsdbUpgradeFileSystem(STsdb *tsdb, int8_t rollback) { + int32_t code = 0; + int32_t lino = 0; + + TFileSetArray fileSetArray[1] = {0}; + + // open old file system + code = tsdbFSOpen(tsdb, rollback); + TSDB_CHECK_CODE(code, lino, _exit); + + code = tsdbDoUpgradeFileSystem(tsdb, fileSetArray); + TSDB_CHECK_CODE(code, lino, _exit); + + // close file system + code = tsdbFSClose(tsdb); + TSDB_CHECK_CODE(code, lino, _exit); + + // save new file system + char fname[TSDB_FILENAME_LEN]; + current_fname(tsdb, fname, TSDB_FCURRENT); + code = save_fs(fileSetArray, fname); + TSDB_CHECK_CODE(code, lino, _exit); + +_exit: + if (code) { + TSDB_ERROR_LOG(TD_VID(tsdb->pVnode), lino, code); + } + TARRAY2_DESTROY(fileSetArray, tsdbTFileSetClear); + return code; +} + +int32_t tsdbCheckAndUpgradeFileSystem(STsdb *tsdb, int8_t rollback) { + char fname[TSDB_FILENAME_LEN]; + + tsdbGetCurrentFName(tsdb, fname, NULL); + if (!taosCheckExistFile(fname)) return 0; + + int32_t code = tsdbUpgradeFileSystem(tsdb, rollback); + if (code) return code; + + taosRemoveFile(fname); + return 0; +} \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbUpgrade.h b/source/dnode/vnode/src/tsdb/tsdbUpgrade.h new file mode 100644 index 0000000000000000000000000000000000000000..f9aac94e00026bd833f27df5227d7d351664ff63 --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbUpgrade.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdb.h" +#include "tsdbDataFileRW.h" +#include "tsdbDef.h" +#include "tsdbFS2.h" +#include "tsdbUtil2.h" + +#ifndef _TSDB_UPGRADE_H_ +#define _TSDB_UPGRADE_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +int32_t tsdbCheckAndUpgradeFileSystem(STsdb *tsdb, int8_t rollback); + +#ifdef __cplusplus +} +#endif + +#endif /*_TSDB_UPGRADE_H_*/ \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbUtil2.c b/source/dnode/vnode/src/tsdb/tsdbUtil2.c new file mode 100644 index 0000000000000000000000000000000000000000..e938caa1184f34c366dec638b4a5dd9db9eaafb6 --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbUtil2.c @@ -0,0 +1,191 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tsdbUtil2.h" + +// SDelBlock ---------- +int32_t tTombBlockInit(STombBlock *tombBlock) { + for (int32_t i = 0; i < TOMB_RECORD_ELEM_NUM; ++i) { + TARRAY2_INIT(&tombBlock->dataArr[i]); + } + return 0; +} + +int32_t tTombBlockDestroy(STombBlock *tombBlock) { + for (int32_t i = 0; i < TOMB_RECORD_ELEM_NUM; ++i) { + TARRAY2_DESTROY(&tombBlock->dataArr[i], NULL); + } + return 0; +} + +int32_t tTombBlockClear(STombBlock *tombBlock) { + for (int32_t i = 0; i < TOMB_RECORD_ELEM_NUM; ++i) { + TARRAY2_CLEAR(&tombBlock->dataArr[i], NULL); + } + return 0; +} + +int32_t tTombBlockPut(STombBlock *tombBlock, const STombRecord *record) { + int32_t code; + for (int32_t i = 0; i < TOMB_RECORD_ELEM_NUM; ++i) { + code = TARRAY2_APPEND(&tombBlock->dataArr[i], record->dataArr[i]); + if (code) return code; + } + return 0; +} + +int32_t tTombBlockGet(STombBlock *tombBlock, int32_t idx, STombRecord *record) { + if (idx >= TOMB_BLOCK_SIZE(tombBlock)) return TSDB_CODE_OUT_OF_RANGE; + for (int32_t i = 0; i < TOMB_RECORD_ELEM_NUM; ++i) { + record->dataArr[i] = TARRAY2_GET(&tombBlock->dataArr[i], idx); + } + return 0; +} + +int32_t tTombRecordCompare(const STombRecord *r1, const STombRecord *r2) { + if (r1->suid < r2->suid) return -1; + if (r1->suid > r2->suid) return 1; + if (r1->uid < r2->uid) return -1; + if (r1->uid > r2->uid) return 1; + if (r1->version < r2->version) return -1; + if (r1->version > r2->version) return 1; + return 0; +} + +// STbStatisBlock ---------- +int32_t tStatisBlockInit(STbStatisBlock *statisBlock) { + for (int32_t i = 0; i < STATIS_RECORD_NUM_ELEM; ++i) { + TARRAY2_INIT(&statisBlock->dataArr[i]); + } + return 0; +} + +int32_t tStatisBlockDestroy(STbStatisBlock *statisBlock) { + for (int32_t i = 0; i < STATIS_RECORD_NUM_ELEM; ++i) { + TARRAY2_DESTROY(&statisBlock->dataArr[i], NULL); + } + return 0; +} + +int32_t tStatisBlockClear(STbStatisBlock *statisBlock) { + for (int32_t i = 0; i < STATIS_RECORD_NUM_ELEM; ++i) { + TARRAY2_CLEAR(&statisBlock->dataArr[i], NULL); + } + return 0; +} + +int32_t tStatisBlockPut(STbStatisBlock *statisBlock, const STbStatisRecord *record) { + int32_t code; + for (int32_t i = 0; i < STATIS_RECORD_NUM_ELEM; ++i) { + code = TARRAY2_APPEND(&statisBlock->dataArr[i], record->dataArr[i]); + if (code) return code; + } + return 0; +} + +int32_t tStatisBlockGet(STbStatisBlock *statisBlock, int32_t idx, STbStatisRecord *record) { + if (idx >= STATIS_BLOCK_SIZE(statisBlock)) return TSDB_CODE_OUT_OF_RANGE; + for (int32_t i = 0; i < STATIS_RECORD_NUM_ELEM; ++i) { + record->dataArr[i] = TARRAY2_GET(&statisBlock->dataArr[i], idx); + } + return 0; +} + +// SBrinRecord ---------- +int32_t tBrinBlockInit(SBrinBlock *brinBlock) { + for (int32_t i = 0; i < ARRAY_SIZE(brinBlock->dataArr1); ++i) { + TARRAY2_INIT(&brinBlock->dataArr1[i]); + } + for (int32_t i = 0; i < ARRAY_SIZE(brinBlock->dataArr2); ++i) { + TARRAY2_INIT(&brinBlock->dataArr2[i]); + } + return 0; +} + +int32_t tBrinBlockDestroy(SBrinBlock *brinBlock) { + for (int32_t i = 0; i < ARRAY_SIZE(brinBlock->dataArr1); ++i) { + TARRAY2_DESTROY(&brinBlock->dataArr1[i], NULL); + } + for (int32_t i = 0; i < ARRAY_SIZE(brinBlock->dataArr2); ++i) { + TARRAY2_DESTROY(&brinBlock->dataArr2[i], NULL); + } + return 0; +} + +int32_t tBrinBlockClear(SBrinBlock *brinBlock) { + for (int32_t i = 0; i < ARRAY_SIZE(brinBlock->dataArr1); ++i) { + TARRAY2_CLEAR(&brinBlock->dataArr1[i], NULL); + } + for (int32_t i = 0; i < ARRAY_SIZE(brinBlock->dataArr2); ++i) { + TARRAY2_CLEAR(&brinBlock->dataArr2[i], NULL); + } + return 0; +} + +int32_t tBrinBlockPut(SBrinBlock *brinBlock, const SBrinRecord *record) { + int32_t code; + for (int32_t i = 0; i < ARRAY_SIZE(brinBlock->dataArr1); ++i) { + code = TARRAY2_APPEND(&brinBlock->dataArr1[i], record->dataArr1[i]); + if (code) return code; + } + for (int32_t i = 0; i < ARRAY_SIZE(brinBlock->dataArr2); ++i) { + code = TARRAY2_APPEND(&brinBlock->dataArr2[i], record->dataArr2[i]); + if (code) return code; + } + return 0; +} + +int32_t tBrinBlockGet(SBrinBlock *brinBlock, int32_t idx, SBrinRecord *record) { + if (idx >= BRIN_BLOCK_SIZE(brinBlock)) return TSDB_CODE_OUT_OF_RANGE; + for (int32_t i = 0; i < ARRAY_SIZE(brinBlock->dataArr1); ++i) { + record->dataArr1[i] = TARRAY2_GET(&brinBlock->dataArr1[i], idx); + } + for (int32_t i = 0; i < ARRAY_SIZE(brinBlock->dataArr2); ++i) { + record->dataArr2[i] = TARRAY2_GET(&brinBlock->dataArr2[i], idx); + } + return 0; +} + +// other apis ---------- +int32_t tsdbUpdateSkmTb(STsdb *pTsdb, const TABLEID *tbid, SSkmInfo *pSkmTb) { + if (tbid->suid) { + if (pSkmTb->suid == tbid->suid) { + pSkmTb->uid = tbid->uid; + return 0; + } + } else if (pSkmTb->uid == tbid->uid) { + return 0; + } + + pSkmTb->suid = tbid->suid; + pSkmTb->uid = tbid->uid; + tDestroyTSchema(pSkmTb->pTSchema); + return metaGetTbTSchemaEx(pTsdb->pVnode->pMeta, tbid->suid, tbid->uid, -1, &pSkmTb->pTSchema); +} + +int32_t tsdbUpdateSkmRow(STsdb *pTsdb, const TABLEID *tbid, int32_t sver, SSkmInfo *pSkmRow) { + if (pSkmRow->pTSchema && pSkmRow->suid == tbid->suid) { + if (pSkmRow->suid) { + if (sver == pSkmRow->pTSchema->version) return 0; + } else if (pSkmRow->uid == tbid->uid && pSkmRow->pTSchema->version == sver) { + return 0; + } + } + + pSkmRow->suid = tbid->suid; + pSkmRow->uid = tbid->uid; + tDestroyTSchema(pSkmRow->pTSchema); + return metaGetTbTSchemaEx(pTsdb->pVnode->pMeta, tbid->suid, tbid->uid, sver, &pSkmRow->pTSchema); +} \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbUtil2.h b/source/dnode/vnode/src/tsdb/tsdbUtil2.h new file mode 100644 index 0000000000000000000000000000000000000000..fa0636834155df5d8f8ddfbb3988d6a5a7d86bbf --- /dev/null +++ b/source/dnode/vnode/src/tsdb/tsdbUtil2.h @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _TSDB_UTIL_H +#define _TSDB_UTIL_H + +#include "tsdbDef.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// STombRecord ---------- +#define TOMB_RECORD_ELEM_NUM 5 +typedef union { + int64_t dataArr[TOMB_RECORD_ELEM_NUM]; + struct { + int64_t suid; + int64_t uid; + int64_t version; + int64_t skey; + int64_t ekey; + }; +} STombRecord; + +typedef union { + TARRAY2(int64_t) dataArr[TOMB_RECORD_ELEM_NUM]; + struct { + TARRAY2(int64_t) suid[1]; + TARRAY2(int64_t) uid[1]; + TARRAY2(int64_t) version[1]; + TARRAY2(int64_t) skey[1]; + TARRAY2(int64_t) ekey[1]; + }; +} STombBlock; + +typedef struct { + SFDataPtr dp[1]; + TABLEID minTbid; + TABLEID maxTbid; + int64_t minVer; + int64_t maxVer; + int32_t numRec; + int32_t size[TOMB_RECORD_ELEM_NUM]; + int8_t cmprAlg; + int8_t rsvd[7]; +} STombBlk; + +typedef TARRAY2(STombBlk) TTombBlkArray; + +#define TOMB_BLOCK_SIZE(db) TARRAY2_SIZE((db)->suid) + +int32_t tTombBlockInit(STombBlock *tombBlock); +int32_t tTombBlockDestroy(STombBlock *tombBlock); +int32_t tTombBlockClear(STombBlock *tombBlock); +int32_t tTombBlockPut(STombBlock *tombBlock, const STombRecord *record); +int32_t tTombBlockGet(STombBlock *tombBlock, int32_t idx, STombRecord *record); +int32_t tTombRecordCompare(const STombRecord *record1, const STombRecord *record2); + +// STbStatisRecord ---------- +#define STATIS_RECORD_NUM_ELEM 5 +typedef union { + int64_t dataArr[STATIS_RECORD_NUM_ELEM]; + struct { + int64_t suid; + int64_t uid; + int64_t firstKey; + int64_t lastKey; + int64_t count; + }; +} STbStatisRecord; + +typedef union { + TARRAY2(int64_t) dataArr[STATIS_RECORD_NUM_ELEM]; + struct { + TARRAY2(int64_t) suid[1]; + TARRAY2(int64_t) uid[1]; + TARRAY2(int64_t) firstKey[1]; + TARRAY2(int64_t) lastKey[1]; + TARRAY2(int64_t) count[1]; + }; +} STbStatisBlock; + +typedef struct { + SFDataPtr dp[1]; + TABLEID minTbid; + TABLEID maxTbid; + int32_t numRec; + int32_t size[STATIS_RECORD_NUM_ELEM]; + int8_t cmprAlg; + int8_t rsvd[7]; +} SStatisBlk; + +#define STATIS_BLOCK_SIZE(db) TARRAY2_SIZE((db)->suid) + +int32_t tStatisBlockInit(STbStatisBlock *statisBlock); +int32_t tStatisBlockDestroy(STbStatisBlock *statisBlock); +int32_t tStatisBlockClear(STbStatisBlock *statisBlock); +int32_t tStatisBlockPut(STbStatisBlock *statisBlock, const STbStatisRecord *record); +int32_t tStatisBlockGet(STbStatisBlock *statisBlock, int32_t idx, STbStatisRecord *record); + +// SBrinRecord ---------- +typedef union { + struct { + int64_t dataArr1[10]; + int32_t dataArr2[5]; + }; + struct { + int64_t suid; + int64_t uid; + int64_t firstKey; + int64_t firstKeyVer; + int64_t lastKey; + int64_t lastKeyVer; + int64_t minVer; + int64_t maxVer; + int64_t blockOffset; + int64_t smaOffset; + int32_t blockSize; + int32_t blockKeySize; + int32_t smaSize; + int32_t numRow; + int32_t count; + }; +} SBrinRecord; + +typedef union { + struct { + TARRAY2(int64_t) dataArr1[10]; + TARRAY2(int32_t) dataArr2[5]; + }; + struct { + TARRAY2(int64_t) suid[1]; + TARRAY2(int64_t) uid[1]; + TARRAY2(int64_t) firstKey[1]; + TARRAY2(int64_t) firstKeyVer[1]; + TARRAY2(int64_t) lastKey[1]; + TARRAY2(int64_t) lastKeyVer[1]; + TARRAY2(int64_t) minVer[1]; + TARRAY2(int64_t) maxVer[1]; + TARRAY2(int64_t) blockOffset[1]; + TARRAY2(int64_t) smaOffset[1]; + TARRAY2(int32_t) blockSize[1]; + TARRAY2(int32_t) blockKeySize[1]; + TARRAY2(int32_t) smaSize[1]; + TARRAY2(int32_t) numRow[1]; + TARRAY2(int32_t) count[1]; + }; +} SBrinBlock; + +typedef struct { + SFDataPtr dp[1]; + TABLEID minTbid; + TABLEID maxTbid; + int64_t minVer; + int64_t maxVer; + int32_t numRec; + int32_t size[15]; + int8_t cmprAlg; + int8_t rsvd[7]; +} SBrinBlk; + +typedef TARRAY2(SBrinBlk) TBrinBlkArray; + +#define BRIN_BLOCK_SIZE(db) TARRAY2_SIZE((db)->suid) + +int32_t tBrinBlockInit(SBrinBlock *brinBlock); +int32_t tBrinBlockDestroy(SBrinBlock *brinBlock); +int32_t tBrinBlockClear(SBrinBlock *brinBlock); +int32_t tBrinBlockPut(SBrinBlock *brinBlock, const SBrinRecord *record); +int32_t tBrinBlockGet(SBrinBlock *brinBlock, int32_t idx, SBrinRecord *record); + +// other apis +int32_t tsdbUpdateSkmTb(STsdb *pTsdb, const TABLEID *tbid, SSkmInfo *pSkmTb); +int32_t tsdbUpdateSkmRow(STsdb *pTsdb, const TABLEID *tbid, int32_t sver, SSkmInfo *pSkmRow); + +#ifdef __cplusplus +} +#endif + +#endif /*_TSDB_UTIL_H*/ \ No newline at end of file diff --git a/source/dnode/vnode/src/vnd/vnodeCfg.c b/source/dnode/vnode/src/vnd/vnodeCfg.c index 3dc1cd800a14ad684964f60ff7b98e0bf4c6aa19..d88d8820ee458bc2782e42a79f63230f59e4557a 100644 --- a/source/dnode/vnode/src/vnd/vnodeCfg.c +++ b/source/dnode/vnode/src/vnd/vnodeCfg.c @@ -49,7 +49,7 @@ const SVnodeCfg vnodeCfgDefault = {.vgId = -1, .hashBegin = 0, .hashEnd = 0, .hashMethod = 0, - .sttTrigger = TSDB_DEFAULT_STT_FILE, + .sttTrigger = TSDB_DEFAULT_SST_TRIGGER, .tsdbPageSize = TSDB_DEFAULT_PAGE_SIZE}; int vnodeCheckCfg(const SVnodeCfg *pCfg) { @@ -57,7 +57,7 @@ int vnodeCheckCfg(const SVnodeCfg *pCfg) { return 0; } -const char* vnodeRoleToStr(ESyncRole role) { +const char *vnodeRoleToStr(ESyncRole role) { switch (role) { case TAOS_SYNC_ROLE_VOTER: return "true"; @@ -68,11 +68,11 @@ const char* vnodeRoleToStr(ESyncRole role) { } } -const ESyncRole vnodeStrToRole(char* str) { - if(strcmp(str, "true") == 0){ +const ESyncRole vnodeStrToRole(char *str) { + if (strcmp(str, "true") == 0) { return TAOS_SYNC_ROLE_VOTER; } - if(strcmp(str, "false") == 0){ + if (strcmp(str, "false") == 0) { return TAOS_SYNC_ROLE_LEARNER; } @@ -299,10 +299,9 @@ int vnodeDecodeConfig(const SJson *pJson, void *pObj) { char role[10] = {0}; code = tjsonGetStringValue(info, "isReplica", role); if (code < 0) return -1; - if(strlen(role) != 0){ + if (strlen(role) != 0) { pNode->nodeRole = vnodeStrToRole(role); - } - else{ + } else { pNode->nodeRole = TAOS_SYNC_ROLE_VOTER; } vDebug("vgId:%d, decode config, replica:%d ep:%s:%u dnode:%d", pCfg->vgId, i, pNode->nodeFqdn, pNode->nodePort, diff --git a/source/dnode/vnode/src/vnd/vnodeCommit.c b/source/dnode/vnode/src/vnd/vnodeCommit.c index f79da37b70538299217e6a9c3a9fb874d57ab9e3..fd701d7b959fa26f06d2c9dc2c480f7e6b0711a4 100644 --- a/source/dnode/vnode/src/vnd/vnodeCommit.c +++ b/source/dnode/vnode/src/vnd/vnodeCommit.c @@ -16,6 +16,13 @@ #include "vnd.h" #include "vnodeInt.h" +extern int32_t tsdbPreCommit(STsdb *pTsdb); +extern int32_t tsdbCommitBegin(STsdb *pTsdb, SCommitInfo *pInfo); +extern int32_t tsdbCommitCommit(STsdb *pTsdb); +extern int32_t tsdbCommitAbort(STsdb *pTsdb); + +#define VND_INFO_FNAME_TMP "vnode_tmp.json" + static int vnodeEncodeInfo(const SVnodeInfo *pInfo, char **ppData); static int vnodeCommitImpl(SCommitInfo *pInfo); @@ -298,7 +305,7 @@ static int32_t vnodePrepareCommit(SVnode *pVnode, SCommitInfo *pInfo) { TSDB_CHECK_CODE(code, lino, _exit); } - tsdbPrepareCommit(pVnode->pTsdb); + tsdbPreCommit(pVnode->pTsdb); metaPrepareAsyncCommit(pVnode->pMeta); @@ -432,8 +439,7 @@ static int vnodeCommitImpl(SCommitInfo *pInfo) { syncBeginSnapshot(pVnode->sync, pInfo->info.state.committed); - // commit each sub-system - code = tsdbCommit(pVnode->pTsdb, pInfo); + code = tsdbCommitBegin(pVnode->pTsdb, pInfo); TSDB_CHECK_CODE(code, lino, _exit); if (!TSDB_CACHE_NO(pVnode->config)) { @@ -457,7 +463,7 @@ static int vnodeCommitImpl(SCommitInfo *pInfo) { TSDB_CHECK_CODE(code, lino, _exit); } - code = tsdbFinishCommit(pVnode->pTsdb); + code = tsdbCommitCommit(pVnode->pTsdb); TSDB_CHECK_CODE(code, lino, _exit); if (VND_IS_RSMA(pVnode)) { diff --git a/source/dnode/vnode/src/vnd/vnodeInitApi.c b/source/dnode/vnode/src/vnd/vnodeInitApi.c index 28a88561af0aea5aa2ad236d0d0d71ff6da43093..5c8d563d73f2d98567b1118b676eb607c5937bf9 100644 --- a/source/dnode/vnode/src/vnd/vnodeInitApi.c +++ b/source/dnode/vnode/src/vnd/vnodeInitApi.c @@ -42,24 +42,24 @@ void initStorageAPI(SStorageAPI* pAPI) { void initTsdbReaderAPI(TsdReader* pReader) { pReader->tsdReaderOpen = (int32_t(*)(void*, SQueryTableDataCond*, void*, int32_t, SSDataBlock*, void**, const char*, - bool, SHashObj**))tsdbReaderOpen; - pReader->tsdReaderClose = tsdbReaderClose; + bool, SHashObj**))tsdbReaderOpen2; + pReader->tsdReaderClose = tsdbReaderClose2; - pReader->tsdNextDataBlock = tsdbNextDataBlock; + pReader->tsdNextDataBlock = tsdbNextDataBlock2; - pReader->tsdReaderRetrieveDataBlock = tsdbRetrieveDataBlock; - pReader->tsdReaderReleaseDataBlock = tsdbReleaseDataBlock; + pReader->tsdReaderRetrieveDataBlock = tsdbRetrieveDataBlock2; + pReader->tsdReaderReleaseDataBlock = tsdbReleaseDataBlock2; - pReader->tsdReaderRetrieveBlockSMAInfo = tsdbRetrieveDatablockSMA; + pReader->tsdReaderRetrieveBlockSMAInfo = tsdbRetrieveDatablockSMA2; pReader->tsdReaderNotifyClosing = tsdbReaderSetCloseFlag; - pReader->tsdReaderResetStatus = tsdbReaderReset; + pReader->tsdReaderResetStatus = tsdbReaderReset2; - pReader->tsdReaderGetDataBlockDistInfo = tsdbGetFileBlocksDistInfo; - pReader->tsdReaderGetNumOfInMemRows = tsdbGetNumOfRowsInMemTable; // todo this function should be moved away + pReader->tsdReaderGetDataBlockDistInfo = tsdbGetFileBlocksDistInfo2; + pReader->tsdReaderGetNumOfInMemRows = tsdbGetNumOfRowsInMemTable2; // todo this function should be moved away - pReader->tsdSetQueryTableList = tsdbSetTableList; - pReader->tsdSetReaderTaskId = (void (*)(void*, const char*))tsdbReaderSetId; + pReader->tsdSetQueryTableList = tsdbSetTableList2; + pReader->tsdSetReaderTaskId = (void (*)(void*, const char*))tsdbReaderSetId2; } void initMetadataAPI(SStoreMeta* pMeta) { @@ -180,6 +180,8 @@ void initStateStoreAPI(SStateStore* pStore) { pStore->updateInfoIsUpdated = updateInfoIsUpdated; pStore->updateInfoIsTableInserted = updateInfoIsTableInserted; pStore->updateInfoDestroy = updateInfoDestroy; + pStore->windowSBfDelete = windowSBfDelete; + pStore->windowSBfAdd = windowSBfAdd; pStore->updateInfoInitP = updateInfoInitP; pStore->updateInfoAddCloseWindowSBF = updateInfoAddCloseWindowSBF; @@ -238,7 +240,7 @@ void initCacheFn(SStoreCacheReader* pCache) { } void initSnapshotFn(SStoreSnapshotFn* pSnapshot) { - pSnapshot->createSnapshot = setForSnapShot; + pSnapshot->setForSnapShot = setForSnapShot; pSnapshot->destroySnapshot = destroySnapContext; pSnapshot->getMetaTableInfoFromSnapshot = getMetaTableInfoFromSnapshot; pSnapshot->getTableInfoFromSnapshot = getTableInfoFromSnapshot; diff --git a/source/dnode/vnode/src/vnd/vnodeModule.c b/source/dnode/vnode/src/vnd/vnodeModule.c index 782ffd788d27d338743447fb67954a9dd7812ba7..74a8d14a86c6340c3acedec831524b8ae765b91e 100644 --- a/source/dnode/vnode/src/vnd/vnodeModule.c +++ b/source/dnode/vnode/src/vnd/vnodeModule.c @@ -23,26 +23,24 @@ struct SVnodeTask { void* arg; }; -struct SVnodeGlobal { - int8_t init; - int8_t stop; +typedef struct { int nthreads; TdThread* threads; TdThreadMutex mutex; TdThreadCond hasTask; SVnodeTask queue; +} SVnodeThreadPool; + +struct SVnodeGlobal { + int8_t init; + int8_t stop; + SVnodeThreadPool tp[2]; }; struct SVnodeGlobal vnodeGlobal; static void* loop(void* arg); -static tsem_t canCommit = {0}; - -static void vnodeInitCommit() { tsem_init(&canCommit, 0, 4); }; -void vnode_wait_commit() { tsem_wait(&canCommit); } -void vnode_done_commit() { tsem_wait(&canCommit); } - int vnodeInit(int nthreads) { int8_t init; int ret; @@ -51,28 +49,30 @@ int vnodeInit(int nthreads) { if (init) { return 0; } + vnodeGlobal.stop = 0; - taosThreadMutexInit(&vnodeGlobal.mutex, NULL); - taosThreadCondInit(&vnodeGlobal.hasTask, NULL); + for (int32_t i = 0; i < ARRAY_SIZE(vnodeGlobal.tp); i++) { + taosThreadMutexInit(&vnodeGlobal.tp[i].mutex, NULL); + taosThreadCondInit(&vnodeGlobal.tp[i].hasTask, NULL); - taosThreadMutexLock(&vnodeGlobal.mutex); + taosThreadMutexLock(&vnodeGlobal.tp[i].mutex); - vnodeGlobal.stop = 0; - vnodeGlobal.queue.next = &vnodeGlobal.queue; - vnodeGlobal.queue.prev = &vnodeGlobal.queue; + vnodeGlobal.tp[i].queue.next = &vnodeGlobal.tp[i].queue; + vnodeGlobal.tp[i].queue.prev = &vnodeGlobal.tp[i].queue; - taosThreadMutexUnlock(&(vnodeGlobal.mutex)); + taosThreadMutexUnlock(&(vnodeGlobal.tp[i].mutex)); - vnodeGlobal.nthreads = nthreads; - vnodeGlobal.threads = taosMemoryCalloc(nthreads, sizeof(TdThread)); - if (vnodeGlobal.threads == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - vError("failed to init vnode module since:%s", tstrerror(terrno)); - return -1; - } + vnodeGlobal.tp[i].nthreads = nthreads; + vnodeGlobal.tp[i].threads = taosMemoryCalloc(nthreads, sizeof(TdThread)); + if (vnodeGlobal.tp[i].threads == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + vError("failed to init vnode module since:%s", tstrerror(terrno)); + return -1; + } - for (int i = 0; i < nthreads; i++) { - taosThreadCreate(&(vnodeGlobal.threads[i]), NULL, loop, NULL); + for (int j = 0; j < nthreads; j++) { + taosThreadCreate(&(vnodeGlobal.tp[i].threads[j]), NULL, loop, &vnodeGlobal.tp[i]); + } } if (walInit() < 0) { @@ -92,27 +92,29 @@ void vnodeCleanup() { if (init == 0) return; // set stop - taosThreadMutexLock(&(vnodeGlobal.mutex)); vnodeGlobal.stop = 1; - taosThreadCondBroadcast(&(vnodeGlobal.hasTask)); - taosThreadMutexUnlock(&(vnodeGlobal.mutex)); + for (int32_t i = 0; i < ARRAY_SIZE(vnodeGlobal.tp); i++) { + taosThreadMutexLock(&(vnodeGlobal.tp[i].mutex)); + taosThreadCondBroadcast(&(vnodeGlobal.tp[i].hasTask)); + taosThreadMutexUnlock(&(vnodeGlobal.tp[i].mutex)); + + // wait for threads + for (int j = 0; j < vnodeGlobal.tp[i].nthreads; j++) { + taosThreadJoin(vnodeGlobal.tp[i].threads[j], NULL); + } - // wait for threads - for (int i = 0; i < vnodeGlobal.nthreads; i++) { - taosThreadJoin(vnodeGlobal.threads[i], NULL); + // clear source + taosMemoryFreeClear(vnodeGlobal.tp[i].threads); + taosThreadCondDestroy(&(vnodeGlobal.tp[i].hasTask)); + taosThreadMutexDestroy(&(vnodeGlobal.tp[i].mutex)); } - // clear source - taosMemoryFreeClear(vnodeGlobal.threads); - taosThreadCondDestroy(&(vnodeGlobal.hasTask)); - taosThreadMutexDestroy(&(vnodeGlobal.mutex)); - walCleanUp(); tqCleanUp(); smaCleanUp(); } -int vnodeScheduleTask(int (*execute)(void*), void* arg) { +int vnodeScheduleTaskEx(int tpid, int (*execute)(void*), void* arg) { SVnodeTask* pTask; ASSERT(!vnodeGlobal.stop); @@ -126,35 +128,42 @@ int vnodeScheduleTask(int (*execute)(void*), void* arg) { pTask->execute = execute; pTask->arg = arg; - taosThreadMutexLock(&(vnodeGlobal.mutex)); - pTask->next = &vnodeGlobal.queue; - pTask->prev = vnodeGlobal.queue.prev; - vnodeGlobal.queue.prev->next = pTask; - vnodeGlobal.queue.prev = pTask; - taosThreadCondSignal(&(vnodeGlobal.hasTask)); - taosThreadMutexUnlock(&(vnodeGlobal.mutex)); + taosThreadMutexLock(&(vnodeGlobal.tp[tpid].mutex)); + pTask->next = &vnodeGlobal.tp[tpid].queue; + pTask->prev = vnodeGlobal.tp[tpid].queue.prev; + vnodeGlobal.tp[tpid].queue.prev->next = pTask; + vnodeGlobal.tp[tpid].queue.prev = pTask; + taosThreadCondSignal(&(vnodeGlobal.tp[tpid].hasTask)); + taosThreadMutexUnlock(&(vnodeGlobal.tp[tpid].mutex)); return 0; } +int vnodeScheduleTask(int (*execute)(void*), void* arg) { return vnodeScheduleTaskEx(0, execute, arg); } + /* ------------------------ STATIC METHODS ------------------------ */ static void* loop(void* arg) { - SVnodeTask* pTask; - int ret; - - setThreadName("vnode-commit"); + SVnodeThreadPool* tp = (SVnodeThreadPool*)arg; + SVnodeTask* pTask; + int ret; + + if (tp == &vnodeGlobal.tp[0]) { + setThreadName("vnode-commit"); + } else if (tp == &vnodeGlobal.tp[1]) { + setThreadName("vnode-merge"); + } for (;;) { - taosThreadMutexLock(&(vnodeGlobal.mutex)); + taosThreadMutexLock(&(tp->mutex)); for (;;) { - pTask = vnodeGlobal.queue.next; - if (pTask == &vnodeGlobal.queue) { + pTask = tp->queue.next; + if (pTask == &tp->queue) { // no task if (vnodeGlobal.stop) { - taosThreadMutexUnlock(&(vnodeGlobal.mutex)); + taosThreadMutexUnlock(&(tp->mutex)); return NULL; } else { - taosThreadCondWait(&(vnodeGlobal.hasTask), &(vnodeGlobal.mutex)); + taosThreadCondWait(&(tp->hasTask), &(tp->mutex)); } } else { // has task @@ -164,7 +173,7 @@ static void* loop(void* arg) { } } - taosThreadMutexUnlock(&(vnodeGlobal.mutex)); + taosThreadMutexUnlock(&(tp->mutex)); pTask->execute(pTask->arg); taosMemoryFree(pTask); diff --git a/source/dnode/vnode/src/vnd/vnodeOpen.c b/source/dnode/vnode/src/vnd/vnodeOpen.c index db3b35867d3c72168f6cd06d9bd7a8108eb80552..dfd9f288efbc1d638100eb34d594497d8632de33 100644 --- a/source/dnode/vnode/src/vnd/vnodeOpen.c +++ b/source/dnode/vnode/src/vnd/vnodeOpen.c @@ -479,8 +479,8 @@ void vnodeClose(SVnode *pVnode) { tsem_wait(&pVnode->canCommit); vnodeSyncClose(pVnode); vnodeQueryClose(pVnode); - walClose(pVnode->pWal); tqClose(pVnode->pTq); + walClose(pVnode->pWal); if (pVnode->pTsdb) tsdbClose(&pVnode->pTsdb); smaClose(pVnode->pSma); if (pVnode->pMeta) metaClose(&pVnode->pMeta); diff --git a/source/dnode/vnode/src/vnd/vnodeRetention.c b/source/dnode/vnode/src/vnd/vnodeRetention.c index f582d5e4a4eaf7b995a625d55e3fc73797d44bfb..f3344d1d7d0f82cca3872a27d50b5a6ad2680e98 100644 --- a/source/dnode/vnode/src/vnd/vnodeRetention.c +++ b/source/dnode/vnode/src/vnd/vnodeRetention.c @@ -15,111 +15,8 @@ #include "vnd.h" -typedef struct { - SVnode *pVnode; - int64_t now; - int64_t commitID; - SVnodeInfo info; -} SRetentionInfo; +extern int32_t tsdbRetention(STsdb *tsdb, int64_t now, int32_t sync); -extern bool tsdbShouldDoRetention(STsdb *pTsdb, int64_t now); -extern int32_t tsdbDoRetention(STsdb *pTsdb, int64_t now); -extern int32_t tsdbCommitRetention(STsdb *pTsdb); - -static int32_t vnodePrepareRentention(SVnode *pVnode, SRetentionInfo *pInfo) { - int32_t code = 0; - int32_t lino = 0; - - tsem_wait(&pVnode->canCommit); - - pInfo->commitID = ++pVnode->state.commitID; - - char dir[TSDB_FILENAME_LEN] = {0}; - vnodeGetPrimaryDir(pVnode->path, pVnode->diskPrimary, pVnode->pTfs, dir, TSDB_FILENAME_LEN); - - if (vnodeLoadInfo(dir, &pInfo->info) < 0) { - code = terrno; - TSDB_CHECK_CODE(code, lino, _exit); - } - -_exit: - if (code) { - vError("vgId:%d %s failed at line %d since %s", TD_VID(pVnode), __func__, lino, tstrerror(code)); - tsem_post(&pVnode->canCommit); - } else { - vInfo("vgId:%d %s done", TD_VID(pVnode), __func__); - } - return code; -} - -static int32_t vnodeRetentionTask(void *param) { - int32_t code = 0; - int32_t lino = 0; - - SRetentionInfo *pInfo = (SRetentionInfo *)param; - SVnode *pVnode = pInfo->pVnode; - char dir[TSDB_FILENAME_LEN] = {0}; - - vnodeGetPrimaryDir(pVnode->path, pVnode->diskPrimary, pVnode->pTfs, dir, TSDB_FILENAME_LEN); - - // save info - pInfo->info.state.commitID = pInfo->commitID; - - if (vnodeSaveInfo(dir, &pInfo->info) < 0) { - code = terrno; - TSDB_CHECK_CODE(code, lino, _exit); - } - - // do job - code = tsdbDoRetention(pInfo->pVnode->pTsdb, pInfo->now); - TSDB_CHECK_CODE(code, lino, _exit); - - code = smaDoRetention(pInfo->pVnode->pSma, pInfo->now); - TSDB_CHECK_CODE(code, lino, _exit); - - // commit info - vnodeCommitInfo(dir); - - // commit sub-job - tsdbCommitRetention(pVnode->pTsdb); - -_exit: - if (code) { - vError("vgId:%d %s failed at line %d since %s", TD_VID(pInfo->pVnode), __func__, lino, tstrerror(code)); - } else { - vInfo("vgId:%d %s done", TD_VID(pInfo->pVnode), __func__); - } - tsem_post(&pInfo->pVnode->canCommit); - taosMemoryFree(pInfo); - return code; -} - -int32_t vnodeAsyncRentention(SVnode *pVnode, int64_t now) { - int32_t code = 0; - int32_t lino = 0; - - if (!tsdbShouldDoRetention(pVnode->pTsdb, now)) return code; - - SRetentionInfo *pInfo = (SRetentionInfo *)taosMemoryCalloc(1, sizeof(*pInfo)); - if (pInfo == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - TSDB_CHECK_CODE(code, lino, _exit); - } - - pInfo->pVnode = pVnode; - pInfo->now = now; - - code = vnodePrepareRentention(pVnode, pInfo); - TSDB_CHECK_CODE(code, lino, _exit); - - vnodeScheduleTask(vnodeRetentionTask, pInfo); - -_exit: - if (code) { - vError("vgId:%d %s failed at line %d since %s", TD_VID(pVnode), __func__, lino, tstrerror(code)); - if (pInfo) taosMemoryFree(pInfo); - } else { - vInfo("vgId:%d %s done", TD_VID(pInfo->pVnode), __func__); - } - return 0; -} +int32_t vnodeDoRetention(SVnode *pVnode, int64_t now) { + return tsdbRetention(pVnode->pTsdb, now, pVnode->config.sttTrigger == 1); +} \ No newline at end of file diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index f39f813d6aadac2b4beda1bde49faf7021b4db3f..9d6ba2ad0262b74bb35f758ec1d234b72201d2d2 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -462,12 +462,7 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t ver, SRpcMsg } break; case TDMT_VND_TMQ_COMMIT_OFFSET: - if (tqProcessOffsetCommitReq(pVnode->pTq, ver, pReq, pMsg->contLen - sizeof(SMsgHead)) < 0) { - goto _err; - } - break; - case TDMT_VND_TMQ_SEEK_TO_OFFSET: - if (tqProcessSeekReq(pVnode->pTq, ver, pReq, pMsg->contLen - sizeof(SMsgHead)) < 0) { + if (tqProcessOffsetCommitReq(pVnode->pTq, ver, pReq, len) < 0) { goto _err; } break; @@ -636,6 +631,11 @@ int32_t vnodeProcessFetchMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) { // return tqProcessPollReq(pVnode->pTq, pMsg); case TDMT_VND_TMQ_VG_WALINFO: return tqProcessVgWalInfoReq(pVnode->pTq, pMsg); + case TDMT_VND_TMQ_VG_COMMITTEDINFO: + return tqProcessVgCommittedInfoReq(pVnode->pTq, pMsg); + case TDMT_VND_TMQ_SEEK: + return tqProcessSeekReq(pVnode->pTq, pMsg); + default: vError("unknown msg type:%d in fetch queue", pMsg->msgType); return TSDB_CODE_APP_ERROR; @@ -671,9 +671,9 @@ int32_t vnodeProcessStreamMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) case TDMT_STREAM_TRANSFER_STATE: return tqProcessTaskTransferStateReq(pVnode->pTq, pMsg); case TDMT_STREAM_SCAN_HISTORY_FINISH: - return tqProcessStreamTaskScanHistoryFinishReq(pVnode->pTq, pMsg); + return tqProcessTaskScanHistoryFinishReq(pVnode->pTq, pMsg); case TDMT_STREAM_SCAN_HISTORY_FINISH_RSP: - return tqProcessTaskRecoverFinishRsp(pVnode->pTq, pMsg); + return tqProcessTaskScanHistoryFinishRsp(pVnode->pTq, pMsg); default: vError("unknown msg type:%d in stream queue", pMsg->msgType); return TSDB_CODE_APP_ERROR; @@ -696,7 +696,8 @@ void vnodeUpdateMetaRsp(SVnode *pVnode, STableMetaRsp *pMetaRsp) { pMetaRsp->precision = pVnode->config.tsdbCfg.precision; } -extern int32_t vnodeAsyncRentention(SVnode *pVnode, int64_t now); +extern int32_t vnodeDoRetention(SVnode *pVnode, int64_t now); + static int32_t vnodeProcessTrimReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp) { int32_t code = 0; SVTrimDbReq trimReq = {0}; @@ -709,10 +710,7 @@ static int32_t vnodeProcessTrimReq(SVnode *pVnode, int64_t ver, void *pReq, int3 vInfo("vgId:%d, trim vnode request will be processed, time:%d", pVnode->config.vgId, trimReq.timestamp); - // process - vnodeAsyncRentention(pVnode, trimReq.timestamp); - tsem_wait(&pVnode->canCommit); - tsem_post(&pVnode->canCommit); + code = vnodeDoRetention(pVnode, trimReq.timestamp); _exit: return code; @@ -737,7 +735,7 @@ static int32_t vnodeProcessDropTtlTbReq(SVnode *pVnode, int64_t ver, void *pReq, tqUpdateTbUidList(pVnode->pTq, tbUids, false); } - vnodeAsyncRentention(pVnode, ttlReq.timestampSec); + vnodeDoRetention(pVnode, ttlReq.timestampSec); end: taosArrayDestroy(tbUids); diff --git a/source/libs/catalog/inc/catalogInt.h b/source/libs/catalog/inc/catalogInt.h index 5746ea23406a722ec7464a9db07786c5cc5ee410..7d47e82164fdbd5b77a5eb70637957ae048ef7bb 100644 --- a/source/libs/catalog/inc/catalogInt.h +++ b/source/libs/catalog/inc/catalogInt.h @@ -938,7 +938,7 @@ int32_t ctgInitJob(SCatalog* pCtg, SRequestConnInfo* pConn, SCtgJob** job, const void* param); int32_t ctgLaunchJob(SCtgJob* pJob); int32_t ctgMakeAsyncRes(SCtgJob* pJob); -int32_t ctgLaunchSubTask(SCtgTask* pTask, CTG_TASK_TYPE type, ctgSubTaskCbFp fp, void* param); +int32_t ctgLaunchSubTask(SCtgTask** ppTask, CTG_TASK_TYPE type, ctgSubTaskCbFp fp, void* param); int32_t ctgGetTbCfgCb(SCtgTask* pTask); void ctgFreeHandle(SCatalog* pCatalog); diff --git a/source/libs/catalog/src/ctgAsync.c b/source/libs/catalog/src/ctgAsync.c index 562343c9c70b4b8a38c866069e897535c5fdfddd..fb5ecf7ad219b029c6d2d256192dc4b8f5d983b4 100644 --- a/source/libs/catalog/src/ctgAsync.c +++ b/source/libs/catalog/src/ctgAsync.c @@ -2090,25 +2090,25 @@ int32_t ctgLaunchGetTbCfgTask(SCtgTask* pTask) { } CTG_CACHE_NHIT_INC(CTG_CI_TBL_CFG, 1); - + if (pCtx->tbType <= 0) { CTG_ERR_JRET(ctgReadTbTypeFromCache(pCtg, dbFName, pCtx->pName->tname, &pCtx->tbType)); if (pCtx->tbType <= 0) { SCtgTbMetaParam param; param.pName = pCtx->pName; param.flag = 0; - CTG_ERR_JRET(ctgLaunchSubTask(pTask, CTG_TASK_GET_TB_META, ctgGetTbCfgCb, ¶m)); + CTG_ERR_JRET(ctgLaunchSubTask(&pTask, CTG_TASK_GET_TB_META, ctgGetTbCfgCb, ¶m)); return TSDB_CODE_SUCCESS; } } - if (TSDB_SUPER_TABLE == pCtx->tbType) { + if (TSDB_SUPER_TABLE == pCtx->tbType || TSDB_SYSTEM_TABLE == pCtx->tbType) { CTG_ERR_JRET(ctgGetTableCfgFromMnode(pCtg, pConn, pCtx->pName, NULL, pTask)); } else { if (NULL == pCtx->pVgInfo) { CTG_ERR_JRET(ctgGetTbHashVgroupFromCache(pCtg, pCtx->pName, &pCtx->pVgInfo)); if (NULL == pCtx->pVgInfo) { - CTG_ERR_JRET(ctgLaunchSubTask(pTask, CTG_TASK_GET_DB_VGROUP, ctgGetTbCfgCb, dbFName)); + CTG_ERR_JRET(ctgLaunchSubTask(&pTask, CTG_TASK_GET_DB_VGROUP, ctgGetTbCfgCb, dbFName)); return TSDB_CODE_SUCCESS; } } @@ -2145,7 +2145,7 @@ int32_t ctgLaunchGetTbTagTask(SCtgTask* pTask) { if (NULL == pCtx->pVgInfo) { CTG_ERR_JRET(ctgGetTbHashVgroupFromCache(pCtg, pCtx->pName, &pCtx->pVgInfo)); if (NULL == pCtx->pVgInfo) { - CTG_ERR_JRET(ctgLaunchSubTask(pTask, CTG_TASK_GET_DB_VGROUP, ctgGetTbTagCb, dbFName)); + CTG_ERR_JRET(ctgLaunchSubTask(&pTask, CTG_TASK_GET_DB_VGROUP, ctgGetTbTagCb, dbFName)); return TSDB_CODE_SUCCESS; } } @@ -2331,7 +2331,7 @@ int32_t ctgLaunchGetUserTask(SCtgTask* pTask) { SCtgTbMetaParam param; param.pName = &pCtx->user.tbName; param.flag = CTG_FLAG_SYNC_OP; - CTG_ERR_RET(ctgLaunchSubTask(pTask, CTG_TASK_GET_TB_META, ctgGetUserCb, ¶m)); + CTG_ERR_RET(ctgLaunchSubTask(&pTask, CTG_TASK_GET_TB_META, ctgGetUserCb, ¶m)); } else { CTG_ERR_RET(ctgGetUserDbAuthFromMnode(pCtg, pConn, pCtx->user.user, NULL, pTask)); } @@ -2541,19 +2541,35 @@ _return: CTG_RET(code); } -int32_t ctgLaunchSubTask(SCtgTask* pTask, CTG_TASK_TYPE type, ctgSubTaskCbFp fp, void* param) { - SCtgJob* pJob = pTask->pJob; +SCtgTask* ctgGetTask(SCtgJob* pJob, int32_t taskId) { + int32_t taskNum = taosArrayGetSize(pJob->pTasks); + + for (int32_t i = 0; i < taskNum; ++i) { + SCtgTask* pTask = taosArrayGet(pJob->pTasks, i); + if (pTask->taskId == taskId) { + return pTask; + } + } + + return NULL; +} + + +int32_t ctgLaunchSubTask(SCtgTask** ppTask, CTG_TASK_TYPE type, ctgSubTaskCbFp fp, void* param) { + SCtgJob* pJob = (*ppTask)->pJob; int32_t subTaskId = -1; bool newTask = false; + int32_t taskId = (*ppTask)->taskId; - ctgClearSubTaskRes(&pTask->subRes); - pTask->subRes.type = type; - pTask->subRes.fp = fp; + ctgClearSubTaskRes(&(*ppTask)->subRes); + (*ppTask)->subRes.type = type; + (*ppTask)->subRes.fp = fp; CTG_ERR_RET(ctgSearchExistingTask(pJob, type, param, &subTaskId)); if (subTaskId < 0) { CTG_ERR_RET(ctgInitTask(pJob, type, param, &subTaskId)); newTask = true; + *ppTask = ctgGetTask(pJob, taskId); } SCtgTask* pSub = taosArrayGet(pJob->pTasks, subTaskId); @@ -2561,10 +2577,10 @@ int32_t ctgLaunchSubTask(SCtgTask* pTask, CTG_TASK_TYPE type, ctgSubTaskCbFp fp, pSub->subTask = true; } - CTG_ERR_RET(ctgSetSubTaskCb(pSub, pTask)); + CTG_ERR_RET(ctgSetSubTaskCb(pSub, *ppTask)); if (newTask) { - SCtgMsgCtx* pMsgCtx = CTG_GET_TASK_MSGCTX(pTask, -1); + SCtgMsgCtx* pMsgCtx = CTG_GET_TASK_MSGCTX(*ppTask, -1); SCtgMsgCtx* pSubMsgCtx = CTG_GET_TASK_MSGCTX(pSub, -1); pSubMsgCtx->pBatchs = pMsgCtx->pBatchs; @@ -2584,6 +2600,7 @@ int32_t ctgLaunchJob(SCtgJob* pJob) { qDebug("QID:0x%" PRIx64 " ctg launch [%dth] task", pJob->queryId, pTask->taskId); CTG_ERR_RET((*gCtgAsyncFps[pTask->type].launchFp)(pTask)); + pTask = taosArrayGet(pJob->pTasks, i); pTask->status = CTG_TASK_LAUNCHED; } diff --git a/source/libs/catalog/src/ctgCache.c b/source/libs/catalog/src/ctgCache.c index 605f5efeb417d9c38afcef1ae25bff95ab73b330..44de83b7ef142c5cfc277854d196cde332b3895c 100644 --- a/source/libs/catalog/src/ctgCache.c +++ b/source/libs/catalog/src/ctgCache.c @@ -773,12 +773,6 @@ int32_t ctgGetCachedStbNameFromSuid(SCatalog* pCtg, char* dbFName, uint64_t suid int32_t ctgChkAuthFromCache(SCatalog *pCtg, SUserAuthInfo *pReq, bool *inCache, SCtgAuthRsp *pRes) { int32_t code = 0; - if (IS_SYS_DBNAME(pReq->tbName.dbname)) { - *inCache = true; - pRes->pRawRes->pass = true; - ctgDebug("sysdb %s, pass", pReq->tbName.dbname); - return TSDB_CODE_SUCCESS; - } SCtgUserAuth *pUser = (SCtgUserAuth *)taosHashGet(pCtg->userCache, pReq->user, strlen(pReq->user)); if (NULL == pUser) { diff --git a/source/libs/catalog/src/ctgUtil.c b/source/libs/catalog/src/ctgUtil.c index 86f6a51d9bcbdec9883b268db2856c7af7fa4f87..dab007aa47729e7e32f4550fca7f3d60e502d149 100644 --- a/source/libs/catalog/src/ctgUtil.c +++ b/source/libs/catalog/src/ctgUtil.c @@ -1589,6 +1589,12 @@ int32_t ctgChkSetAuthRes(SCatalog* pCtg, SCtgAuthReq* req, SCtgAuthRsp* res) { return TSDB_CODE_SUCCESS; } + if (IS_SYS_DBNAME(pReq->tbName.dbname)) { + pRes->pass = true; + ctgDebug("sysdb %s, pass", pReq->tbName.dbname); + return TSDB_CODE_SUCCESS; + } + char dbFName[TSDB_DB_FNAME_LEN]; tNameGetFullDbName(&pReq->tbName, dbFName); diff --git a/source/libs/command/src/command.c b/source/libs/command/src/command.c index 54352d0a53216febb87163bb41f4849a886500b9..8ddf730d5a884a31377bcb4278ac3927a67e9e31 100644 --- a/source/libs/command/src/command.c +++ b/source/libs/command/src/command.c @@ -802,15 +802,19 @@ static int32_t buildLocalVariablesResultDataBlock(SSDataBlock** pOutput) { pBlock->pDataBlock = taosArrayInit(SHOW_LOCAL_VARIABLES_RESULT_COLS, sizeof(SColumnInfoData)); SColumnInfoData infoData = {0}; + infoData.info.type = TSDB_DATA_TYPE_VARCHAR; infoData.info.bytes = SHOW_LOCAL_VARIABLES_RESULT_FIELD1_LEN; - taosArrayPush(pBlock->pDataBlock, &infoData); infoData.info.type = TSDB_DATA_TYPE_VARCHAR; infoData.info.bytes = SHOW_LOCAL_VARIABLES_RESULT_FIELD2_LEN; taosArrayPush(pBlock->pDataBlock, &infoData); + infoData.info.type = TSDB_DATA_TYPE_VARCHAR; + infoData.info.bytes = SHOW_LOCAL_VARIABLES_RESULT_FIELD3_LEN; + taosArrayPush(pBlock->pDataBlock, &infoData); + *pOutput = pBlock; return TSDB_CODE_SUCCESS; } @@ -823,6 +827,7 @@ int32_t setLocalVariablesResultIntoDataBlock(SSDataBlock* pBlock) { for (int32_t i = 0, c = 0; i < numOfCfg; ++i, c = 0) { SConfigItem* pItem = taosArrayGet(tsCfg->array, i); GRANT_CFG_SKIP; + char name[TSDB_CONFIG_OPTION_LEN + VARSTR_HEADER_SIZE] = {0}; STR_WITH_MAXSIZE_TO_VARSTR(name, pItem->name, TSDB_CONFIG_OPTION_LEN + VARSTR_HEADER_SIZE); SColumnInfoData* pColInfo = taosArrayGet(pBlock->pDataBlock, c++); @@ -835,6 +840,12 @@ int32_t setLocalVariablesResultIntoDataBlock(SSDataBlock* pBlock) { pColInfo = taosArrayGet(pBlock->pDataBlock, c++); colDataSetVal(pColInfo, i, value, false); + char scope[TSDB_CONFIG_SCOPE_LEN + VARSTR_HEADER_SIZE] = {0}; + cfgDumpItemScope(pItem, &scope[VARSTR_HEADER_SIZE], TSDB_CONFIG_SCOPE_LEN, &valueLen); + varDataSetLen(scope, valueLen); + pColInfo = taosArrayGet(pBlock->pDataBlock, c++); + colDataSetVal(pColInfo, i, scope, false); + numOfRows++; } diff --git a/source/libs/executor/inc/executorInt.h b/source/libs/executor/inc/executorInt.h index b3d0ff822506bccb52fa535980a17b0028e06212..9a917adf1b4b8a03b139d26e211b11e519698035 100644 --- a/source/libs/executor/inc/executorInt.h +++ b/source/libs/executor/inc/executorInt.h @@ -80,7 +80,6 @@ enum { STREAM_RECOVER_STEP__PREPARE1, STREAM_RECOVER_STEP__PREPARE2, STREAM_RECOVER_STEP__SCAN1, - STREAM_RECOVER_STEP__SCAN2, }; extern int32_t exchangeObjRefPool; @@ -232,19 +231,20 @@ typedef struct STableMergeScanInfo { int32_t tableEndIndex; bool hasGroupId; uint64_t groupId; - SArray* queryConds; // array of queryTableDataCond STableScanBase base; int32_t bufPageSize; uint32_t sortBufSize; // max buffer size for in-memory sort SArray* pSortInfo; SSortHandle* pSortHandle; SSDataBlock* pSortInputBlock; + SSDataBlock* pReaderBlock; int64_t startTs; // sort start time SArray* sortSourceParams; SLimitInfo limitInfo; int64_t numOfRows; SScanInfo scanInfo; int32_t scanTimes; + int32_t readIdx; SSDataBlock* pResBlock; SSampleExecInfo sample; // sample execution info SSortExecInfo sortExecInfo; @@ -366,7 +366,6 @@ typedef struct SStreamScanInfo { SNode* pTagIndexCond; // recover - int32_t blockRecoverContiCnt; int32_t blockRecoverTotCnt; SSDataBlock* pRecoverRes; diff --git a/source/libs/executor/inc/querytask.h b/source/libs/executor/inc/querytask.h index cdf37bcc6b5a9cd2a06f0398cd17675e2ce62531..7241b015a09321db59af5f212efae85af56959ca 100644 --- a/source/libs/executor/inc/querytask.h +++ b/source/libs/executor/inc/querytask.h @@ -62,8 +62,8 @@ typedef struct { SSchemaWrapper* schema; char tbName[TSDB_TABLE_NAME_LEN]; // this is the current scan table: todo refactor int8_t recoverStep; - bool recoverStep1Finished; - bool recoverStep2Finished; +// bool recoverStep1Finished; +// bool recoverStep2Finished; int8_t recoverScanFinished; SQueryTableDataCond tableCond; SVersionRange fillHistoryVer; diff --git a/source/libs/executor/inc/tsort.h b/source/libs/executor/inc/tsort.h index 627aa825c671ee0d87700f62b00e6b3c9fdfef4d..538a9f18f6012a110b52fa8e2d899f551dd88597 100644 --- a/source/libs/executor/inc/tsort.h +++ b/source/libs/executor/inc/tsort.h @@ -26,6 +26,7 @@ extern "C" { enum { SORT_MULTISOURCE_MERGE = 0x1, SORT_SINGLESOURCE_SORT = 0x2, + SORT_BLOCK_TS_MERGE = 0x3 }; typedef struct SMultiMergeSource { @@ -53,6 +54,12 @@ typedef struct SMsortComparParam { int32_t numOfSources; SArray* orderInfo; // SArray bool cmpGroupId; + + int32_t sortType; + // the following field to speed up when sortType == SORT_BLOCK_TS_MERGE + int32_t tsSlotId; + int32_t order; + __compar_fn_t cmpFn; } SMsortComparParam; typedef struct SSortHandle SSortHandle; @@ -70,8 +77,8 @@ typedef int32_t (*_sort_merge_compar_fn_t)(const void* p1, const void* p2, void* * @return */ SSortHandle* tsortCreateSortHandle(SArray* pOrderInfo, int32_t type, int32_t pageSize, int32_t numOfPages, - SSDataBlock* pBlock, const char* idstr, uint64_t maxRows, uint32_t maxTupleLength, - uint32_t sortBufSize); + SSDataBlock* pBlock, const char* idstr, uint64_t pqMaxRows, uint32_t pqMaxTupleLength, + uint32_t pqSortBufSize); void tsortSetForceUsePQSort(SSortHandle* pHandle); @@ -110,6 +117,10 @@ int32_t tsortSetFetchRawDataFp(SSortHandle* pHandle, _sort_fetch_block_fn_t fetc */ int32_t tsortSetComparFp(SSortHandle* pHandle, _sort_merge_compar_fn_t fp); +/** + * +*/ +void tsortSetMergeLimit(SSortHandle* pHandle, int64_t mergeLimit); /** * */ diff --git a/source/libs/executor/src/aggregateoperator.c b/source/libs/executor/src/aggregateoperator.c index be0ad1c2399c1b423bf6a09635d6334581c321fe..176c4b53be828a0ad356953793f722cc39d43893 100644 --- a/source/libs/executor/src/aggregateoperator.c +++ b/source/libs/executor/src/aggregateoperator.c @@ -45,6 +45,8 @@ typedef struct SAggOperatorInfo { SGroupResInfo groupResInfo; SExprSupp scalarExprSup; bool groupKeyOptimized; + bool hasValidBlock; + SSDataBlock* pNewGroupBlock; } SAggOperatorInfo; static void destroyAggOperatorInfo(void* param); @@ -53,7 +55,6 @@ static void setExecutionContext(SOperatorInfo* pOperator, int32_t numOfOutput, u static int32_t createDataBlockForEmptyInput(SOperatorInfo* pOperator, SSDataBlock** ppBlock); static void destroyDataBlockForEmptyInput(bool blockAllocated, SSDataBlock** ppBlock); -static int32_t doOpenAggregateOptr(SOperatorInfo* pOperator); static int32_t doAggregateImpl(SOperatorInfo* pOperator, SqlFunctionCtx* pCtx); static SSDataBlock* getAggregateResult(SOperatorInfo* pOperator); @@ -111,9 +112,9 @@ SOperatorInfo* createAggregateOperatorInfo(SOperatorInfo* downstream, SAggPhysiN pInfo->binfo.inputTsOrder = pAggNode->node.inputTsOrder; pInfo->binfo.outputTsOrder = pAggNode->node.outputTsOrder; - setOperatorInfo(pOperator, "TableAggregate", QUERY_NODE_PHYSICAL_PLAN_HASH_AGG, true, OP_NOT_OPENED, pInfo, - pTaskInfo); - pOperator->fpSet = createOperatorFpSet(doOpenAggregateOptr, getAggregateResult, NULL, destroyAggOperatorInfo, + setOperatorInfo(pOperator, "TableAggregate", QUERY_NODE_PHYSICAL_PLAN_HASH_AGG, + !pAggNode->node.forceCreateNonBlockingOptr, OP_NOT_OPENED, pInfo, pTaskInfo); + pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, getAggregateResult, NULL, destroyAggOperatorInfo, optrDefaultBufFn, NULL); if (downstream->operatorType == QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN) { @@ -153,28 +154,42 @@ void destroyAggOperatorInfo(void* param) { taosMemoryFreeClear(param); } -// this is a blocking operator -int32_t doOpenAggregateOptr(SOperatorInfo* pOperator) { - if (OPTR_IS_OPENED(pOperator)) { - return TSDB_CODE_SUCCESS; - } - +/** + * @brief get blocks from downstream and fill results into groupedRes after aggragation + * @retval false if no more groups + * @retval true if there could have new groups coming + * @note if pOperator.blocking is true, scan all blocks from downstream, all groups are handled + * if false, fill results of ONE GROUP + * */ +static bool nextGroupedResult(SOperatorInfo* pOperator) { SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; SAggOperatorInfo* pAggInfo = pOperator->info; + if (pOperator->blocking && pAggInfo->hasValidBlock) return false; + SExprSupp* pSup = &pOperator->exprSupp; SOperatorInfo* downstream = pOperator->pDownstream[0]; - int64_t st = taosGetTimestampUs(); - int32_t code = TSDB_CODE_SUCCESS; - int32_t order = pAggInfo->binfo.inputTsOrder; - bool hasValidBlock = false; + int64_t st = taosGetTimestampUs(); + int32_t code = TSDB_CODE_SUCCESS; + int32_t order = pAggInfo->binfo.inputTsOrder; + SSDataBlock* pBlock = pAggInfo->pNewGroupBlock; + if (pBlock) { + pAggInfo->pNewGroupBlock = NULL; + tSimpleHashClear(pAggInfo->aggSup.pResultRowHashTable); + setExecutionContext(pOperator, pOperator->exprSupp.numOfExprs, pBlock->info.id.groupId); + setInputDataBlock(pSup, pBlock, order, pBlock->info.scanFlag, true); + code = doAggregateImpl(pOperator, pSup->pCtx); + if (code != TSDB_CODE_SUCCESS) { + T_LONG_JMP(pTaskInfo->env, code); + } + } while (1) { bool blockAllocated = false; - SSDataBlock* pBlock = downstream->fpSet.getNextFn(downstream); + pBlock = downstream->fpSet.getNextFn(downstream); if (pBlock == NULL) { - if (!hasValidBlock) { + if (!pAggInfo->hasValidBlock) { createDataBlockForEmptyInput(pOperator, &pBlock); if (pBlock == NULL) { break; @@ -184,7 +199,7 @@ int32_t doOpenAggregateOptr(SOperatorInfo* pOperator) { break; } } - hasValidBlock = true; + pAggInfo->hasValidBlock = true; pAggInfo->binfo.pRes->info.scanFlag = pBlock->info.scanFlag; // there is an scalar expression that needs to be calculated before apply the group aggregation. @@ -196,7 +211,11 @@ int32_t doOpenAggregateOptr(SOperatorInfo* pOperator) { T_LONG_JMP(pTaskInfo->env, code); } } - + // if non-blocking mode and new group arrived, save the block and break + if (!pOperator->blocking && pAggInfo->groupId != UINT64_MAX && pBlock->info.id.groupId != pAggInfo->groupId) { + pAggInfo->pNewGroupBlock = pBlock; + break; + } // the pDataBlock are always the same one, no need to call this again setExecutionContext(pOperator, pOperator->exprSupp.numOfExprs, pBlock->info.id.groupId); setInputDataBlock(pSup, pBlock, order, pBlock->info.scanFlag, true); @@ -215,10 +234,7 @@ int32_t doOpenAggregateOptr(SOperatorInfo* pOperator) { } initGroupedResultInfo(&pAggInfo->groupResInfo, pAggInfo->aggSup.pResultRowHashTable, 0); - OPTR_SET_OPENED(pOperator); - - pOperator->cost.openCost = (taosGetTimestampUs() - st) / 1000.0; - return pTaskInfo->code; + return pBlock != NULL; } SSDataBlock* getAggregateResult(SOperatorInfo* pOperator) { @@ -230,26 +246,25 @@ SSDataBlock* getAggregateResult(SOperatorInfo* pOperator) { } SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; - pTaskInfo->code = pOperator->fpSet._openFn(pOperator); - if (pTaskInfo->code != TSDB_CODE_SUCCESS) { - setOperatorCompleted(pOperator); - return NULL; - } + bool hasNewGroups = false; + do { + hasNewGroups = nextGroupedResult(pOperator); + blockDataEnsureCapacity(pInfo->pRes, pOperator->resultInfo.capacity); - blockDataEnsureCapacity(pInfo->pRes, pOperator->resultInfo.capacity); - while (1) { - doBuildResultDatablock(pOperator, pInfo, &pAggInfo->groupResInfo, pAggInfo->aggSup.pResultBuf); - doFilter(pInfo->pRes, pOperator->exprSupp.pFilterInfo, NULL); + while (1) { + doBuildResultDatablock(pOperator, pInfo, &pAggInfo->groupResInfo, pAggInfo->aggSup.pResultBuf); + doFilter(pInfo->pRes, pOperator->exprSupp.pFilterInfo, NULL); - if (!hasRemainResults(&pAggInfo->groupResInfo)) { - setOperatorCompleted(pOperator); - break; - } + if (!hasRemainResults(&pAggInfo->groupResInfo)) { + if (!hasNewGroups) setOperatorCompleted(pOperator); + break; + } - if (pInfo->pRes->info.rows > 0) { - break; + if (pInfo->pRes->info.rows > 0) { + break; + } } - } + } while (pInfo->pRes->info.rows == 0 && hasNewGroups); size_t rows = blockDataGetNumOfRows(pInfo->pRes); pOperator->resultInfo.totalRows += rows; diff --git a/source/libs/executor/src/executil.c b/source/libs/executor/src/executil.c index 832750e967dfd6f996d12281a9ece48b6c6d26c7..e1bf4e7cb0e3b3e2df8cc7931d57c153a952b78b 100644 --- a/source/libs/executor/src/executil.c +++ b/source/libs/executor/src/executil.c @@ -127,6 +127,10 @@ void initGroupedResultInfo(SGroupResInfo* pGroupResInfo, SSHashObj* pHashmap, in if (pGroupResInfo->pRows != NULL) { taosArrayDestroy(pGroupResInfo->pRows); } + if (pGroupResInfo->pBuf) { + taosMemoryFree(pGroupResInfo->pBuf); + pGroupResInfo->pBuf = NULL; + } // extract the result rows information from the hash map int32_t size = tSimpleHashGetSize(pHashmap); @@ -2104,6 +2108,8 @@ int32_t buildGroupIdMapForAllTables(STableListInfo* pTableListInfo, SReadHandle* if (groupSort && groupByTbname) { taosArraySort(pTableListInfo->pTableList, orderbyGroupIdComparFn); pTableListInfo->numOfOuputGroups = numOfTables; + } else if (groupByTbname && pScanNode->groupOrderScan){ + pTableListInfo->numOfOuputGroups = numOfTables; } else { pTableListInfo->numOfOuputGroups = 1; } diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index 06b90d0a516729f77a20c008c81aa828285e557e..05767db2869e429a031fcc8a09ba82b6f9a6309a 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -116,16 +116,6 @@ void resetTaskInfo(qTaskInfo_t tinfo) { clearStreamBlock(pTaskInfo->pRoot); } -void qResetStreamInfoTimeWindow(qTaskInfo_t tinfo) { - SExecTaskInfo* pTaskInfo = (SExecTaskInfo*) tinfo; - if (pTaskInfo == NULL) { - return; - } - - qDebug("%s set fill history start key:%"PRId64, GET_TASKID(pTaskInfo), INT64_MIN); - pTaskInfo->streamInfo.fillHistoryWindow.skey = INT64_MIN; -} - static int32_t doSetStreamBlock(SOperatorInfo* pOperator, void* input, size_t numOfBlocks, int32_t type, const char* id) { if (pOperator->operatorType != QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN) { if (pOperator->numOfDownstream == 0) { @@ -196,11 +186,6 @@ void qSetTaskId(qTaskInfo_t tinfo, uint64_t taskId, uint64_t queryId) { doSetTaskId(pTaskInfo->pRoot, &pTaskInfo->storageAPI); } -//void qSetTaskCode(qTaskInfo_t tinfo, int32_t code) { -// SExecTaskInfo* pTaskInfo = tinfo; -// pTaskInfo->code = code; -//} - int32_t qSetStreamOpOpen(qTaskInfo_t tinfo) { if (tinfo == NULL) { return TSDB_CODE_APP_ERROR; @@ -340,6 +325,7 @@ qTaskInfo_t qCreateStreamExecTaskInfo(void* msg, SReadHandle* readers, int32_t v return NULL; } + qStreamInfoResetTimewindowFilter(pTaskInfo); return pTaskInfo; } @@ -661,23 +647,33 @@ int32_t qExecTask(qTaskInfo_t tinfo, SSDataBlock** pRes, uint64_t* useconds) { *pRes = NULL; int64_t curOwner = 0; - if ((curOwner = atomic_val_compare_exchange_64(&pTaskInfo->owner, 0, threadId)) != 0) { + + // todo extract method + taosRLockLatch(&pTaskInfo->lock); + bool isKilled = isTaskKilled(pTaskInfo); + if (isKilled) { + clearStreamBlock(pTaskInfo->pRoot); + qDebug("%s already killed, abort", GET_TASKID(pTaskInfo)); + + taosRUnLockLatch(&pTaskInfo->lock); + return TSDB_CODE_SUCCESS; + } + + if (pTaskInfo->owner != 0) { qError("%s-%p execTask is now executed by thread:%p", GET_TASKID(pTaskInfo), pTaskInfo, (void*)curOwner); pTaskInfo->code = TSDB_CODE_QRY_IN_EXEC; + + taosRUnLockLatch(&pTaskInfo->lock); return pTaskInfo->code; } + pTaskInfo->owner = threadId; + taosRUnLockLatch(&pTaskInfo->lock); + if (pTaskInfo->cost.start == 0) { pTaskInfo->cost.start = taosGetTimestampUs(); } - if (isTaskKilled(pTaskInfo)) { - clearStreamBlock(pTaskInfo->pRoot); - atomic_store_64(&pTaskInfo->owner, 0); - qDebug("%s already killed, abort", GET_TASKID(pTaskInfo)); - return TSDB_CODE_SUCCESS; - } - // error occurs, record the error code and return to client int32_t ret = setjmp(pTaskInfo->env); if (ret != TSDB_CODE_SUCCESS) { @@ -781,11 +777,13 @@ int32_t qKillTask(qTaskInfo_t tinfo, int32_t rspCode) { qDebug("%s sync killed execTask", GET_TASKID(pTaskInfo)); setTaskKilled(pTaskInfo, TSDB_CODE_TSC_QUERY_KILLED); + taosWLockLatch(&pTaskInfo->lock); while (qTaskIsExecuting(pTaskInfo)) { taosMsleep(10); } - pTaskInfo->code = rspCode; + taosWUnLockLatch(&pTaskInfo->lock); + return TSDB_CODE_SUCCESS; } @@ -889,10 +887,8 @@ int32_t qStreamSourceScanParamForHistoryScanStep1(qTaskInfo_t tinfo, SVersionRan pStreamInfo->fillHistoryVer = *pVerRange; pStreamInfo->fillHistoryWindow = *pWindow; pStreamInfo->recoverStep = STREAM_RECOVER_STEP__PREPARE1; - pStreamInfo->recoverStep1Finished = false; - pStreamInfo->recoverStep2Finished = false; - qDebug("%s step 1. set param for stream scanner for scan history data, verRange:%" PRId64 " - %" PRId64 ", window:%" PRId64 + qDebug("%s step 1. set param for stream scanner for scan-history data, verRange:%" PRId64 " - %" PRId64 ", window:%" PRId64 " - %" PRId64, GET_TASKID(pTaskInfo), pStreamInfo->fillHistoryVer.minVer, pStreamInfo->fillHistoryVer.maxVer, pWindow->skey, pWindow->ekey); @@ -908,10 +904,8 @@ int32_t qStreamSourceScanParamForHistoryScanStep2(qTaskInfo_t tinfo, SVersionRan pStreamInfo->fillHistoryVer = *pVerRange; pStreamInfo->fillHistoryWindow = *pWindow; pStreamInfo->recoverStep = STREAM_RECOVER_STEP__PREPARE2; - pStreamInfo->recoverStep1Finished = true; - pStreamInfo->recoverStep2Finished = false; - qDebug("%s step 2. set param for stream scanner for scan history data, verRange:%" PRId64 " - %" PRId64 + qDebug("%s step 2. set param for stream scanner for scan-history data, verRange:%" PRId64 " - %" PRId64 ", window:%" PRId64 " - %" PRId64, GET_TASKID(pTaskInfo), pStreamInfo->fillHistoryVer.minVer, pStreamInfo->fillHistoryVer.maxVer, pWindow->skey, pWindow->ekey); @@ -1048,23 +1042,15 @@ bool qStreamRecoverScanFinished(qTaskInfo_t tinfo) { return pTaskInfo->streamInfo.recoverScanFinished; } -bool qStreamRecoverScanStep1Finished(qTaskInfo_t tinfo) { +int32_t qStreamInfoResetTimewindowFilter(qTaskInfo_t tinfo) { SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo; - return pTaskInfo->streamInfo.recoverStep1Finished; -} + STimeWindow* pWindow = &pTaskInfo->streamInfo.fillHistoryWindow; -bool qStreamRecoverScanStep2Finished(qTaskInfo_t tinfo) { - SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo; - return pTaskInfo->streamInfo.recoverStep2Finished; -} - -int32_t qStreamRecoverSetAllStepFinished(qTaskInfo_t tinfo) { - SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo; - pTaskInfo->streamInfo.recoverStep1Finished = true; - pTaskInfo->streamInfo.recoverStep2Finished = true; + qDebug("%s set remove scan-history filter window:%" PRId64 "-%" PRId64 ", new window:%" PRId64 "-%" PRId64, + GET_TASKID(pTaskInfo), pWindow->skey, pWindow->ekey, INT64_MIN, INT64_MAX); - // reset the time window - pTaskInfo->streamInfo.fillHistoryWindow.skey = INT64_MIN; + pWindow->skey = INT64_MIN; + pWindow->ekey = INT64_MAX; return 0; } @@ -1171,8 +1157,8 @@ int32_t qStreamPrepareScan(qTaskInfo_t tinfo, STqOffsetVal* pOffset, int8_t subT SStoreTqReader* pReaderAPI = &pTaskInfo->storageAPI.tqReaderFn; SWalReader* pWalReader = pReaderAPI->tqReaderGetWalReader(pInfo->tqReader); walReaderVerifyOffset(pWalReader, pOffset); - if (pReaderAPI->tqReaderSeek(pInfo->tqReader, pOffset->version + 1, id) < 0) { - qError("tqReaderSeek failed ver:%" PRId64 ", %s", pOffset->version + 1, id); + if (pReaderAPI->tqReaderSeek(pInfo->tqReader, pOffset->version, id) < 0) { + qError("tqReaderSeek failed ver:%" PRId64 ", %s", pOffset->version, id); return -1; } } else if (pOffset->type == TMQ_OFFSET__SNAPSHOT_DATA) { @@ -1261,7 +1247,7 @@ int32_t qStreamPrepareScan(qTaskInfo_t tinfo, STqOffsetVal* pOffset, int8_t subT SOperatorInfo* p = extractOperatorInTree(pOperator, QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN, id); STableListInfo* pTableListInfo = ((SStreamRawScanInfo*)(p->info))->pTableListInfo; - if (pAPI->snapshotFn.createSnapshot(sContext, pOffset->uid) != 0) { + if (pAPI->snapshotFn.setForSnapShot(sContext, pOffset->uid) != 0) { qError("setDataForSnapShot error. uid:%" PRId64 " , %s", pOffset->uid, id); terrno = TSDB_CODE_PAR_INTERNAL_ERROR; return -1; @@ -1298,7 +1284,7 @@ int32_t qStreamPrepareScan(qTaskInfo_t tinfo, STqOffsetVal* pOffset, int8_t subT } else if (pOffset->type == TMQ_OFFSET__SNAPSHOT_META) { SStreamRawScanInfo* pInfo = pOperator->info; SSnapContext* sContext = pInfo->sContext; - if (pTaskInfo->storageAPI.snapshotFn.createSnapshot(sContext, pOffset->uid) != 0) { + if (pTaskInfo->storageAPI.snapshotFn.setForSnapShot(sContext, pOffset->uid) != 0) { qError("setForSnapShot error. uid:%" PRIu64 " ,version:%" PRId64, pOffset->uid, pOffset->version); terrno = TSDB_CODE_PAR_INTERNAL_ERROR; return -1; diff --git a/source/libs/executor/src/filloperator.c b/source/libs/executor/src/filloperator.c index 7798ded61bd310e5e447a76693f6b58bf5e2fc4f..80c88a803effb72588097c4b86acd808dde78354 100644 --- a/source/libs/executor/src/filloperator.c +++ b/source/libs/executor/src/filloperator.c @@ -502,9 +502,13 @@ void* destroyStreamFillSupporter(SStreamFillSupporter* pFillSup) { pFillSup->pAllColInfo = destroyFillColumnInfo(pFillSup->pAllColInfo, pFillSup->numOfFillCols, pFillSup->numOfAllCols); tSimpleHashCleanup(pFillSup->pResMap); pFillSup->pResMap = NULL; - releaseOutputBuf(NULL, NULL, (SResultRow*)pFillSup->cur.pRowVal, &pFillSup->pAPI->stateStore); //????? - pFillSup->cur.pRowVal = NULL; cleanupExprSupp(&pFillSup->notFillExprSup); + if (pFillSup->cur.pRowVal != pFillSup->prev.pRowVal && pFillSup->cur.pRowVal != pFillSup->next.pRowVal) { + taosMemoryFree(pFillSup->cur.pRowVal); + } + taosMemoryFree(pFillSup->prev.pRowVal); + taosMemoryFree(pFillSup->next.pRowVal); + taosMemoryFree(pFillSup->nextNext.pRowVal); taosMemoryFree(pFillSup); return NULL; @@ -546,13 +550,17 @@ static void destroyStreamFillOperatorInfo(void* param) { static void resetFillWindow(SResultRowData* pRowData) { pRowData->key = INT64_MIN; - pRowData->pRowVal = NULL; + taosMemoryFreeClear(pRowData->pRowVal); } void resetPrevAndNextWindow(SStreamFillSupporter* pFillSup, void* pState, SStorageAPI* pAPI) { + if (pFillSup->cur.pRowVal != pFillSup->prev.pRowVal && pFillSup->cur.pRowVal != pFillSup->next.pRowVal) { + resetFillWindow(&pFillSup->cur); + } else { + pFillSup->cur.key = INT64_MIN; + pFillSup->cur.pRowVal = NULL; + } resetFillWindow(&pFillSup->prev); - releaseOutputBuf(NULL, NULL, (SResultRow*)pFillSup->cur.pRowVal, &pAPI->stateStore); //??? - resetFillWindow(&pFillSup->cur); resetFillWindow(&pFillSup->next); resetFillWindow(&pFillSup->nextNext); } @@ -1513,11 +1521,11 @@ SOperatorInfo* createStreamFillOperatorInfo(SOperatorInfo* downstream, SStreamFi float v = 0; GET_TYPED_DATA(v, float, pVar->nType, &pVar->i); SET_TYPED_DATA(pCell->pData, pCell->type, v); - } else if (pCell->type == TSDB_DATA_TYPE_DOUBLE) { + } else if (IS_FLOAT_TYPE(pCell->type)) { double v = 0; GET_TYPED_DATA(v, double, pVar->nType, &pVar->i); SET_TYPED_DATA(pCell->pData, pCell->type, v); - } else if (IS_SIGNED_NUMERIC_TYPE(pCell->type)) { + } else if (IS_INTEGER_TYPE(pCell->type)) { int64_t v = 0; GET_TYPED_DATA(v, int64_t, pVar->nType, &pVar->i); SET_TYPED_DATA(pCell->pData, pCell->type, v); diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 9abe4ffef6695c3ae29c9d8a6d10be08d58faedf..da4bd1e23cfcbb8fe90afc24137d7aa03922098a 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -13,8 +13,6 @@ * along with this program. If not, see . */ -// clang-format off - #include "executorInt.h" #include "filter.h" #include "function.h" @@ -55,8 +53,7 @@ typedef struct STableMergeScanSortSourceParam { SOperatorInfo* pOperator; int32_t readerIdx; uint64_t uid; - SSDataBlock* inputBlock; - STsdbReader* dataReader; + STsdbReader* reader; } STableMergeScanSortSourceParam; typedef struct STableCountScanOperatorInfo { @@ -1553,10 +1550,99 @@ static void checkUpdateData(SStreamScanInfo* pInfo, bool invertible, SSDataBlock } } -static int32_t setBlockIntoRes(SStreamScanInfo* pInfo, const SSDataBlock* pBlock, bool filter) { +static void doBlockDataWindowFilter(SSDataBlock* pBlock, int32_t tsIndex, STimeWindow* pWindow, const char* id) { + if (pWindow->skey != INT64_MIN || pWindow->ekey != INT64_MAX) { + bool* p = taosMemoryCalloc(pBlock->info.rows, sizeof(bool)); + bool hasUnqualified = false; + + SColumnInfoData* pCol = taosArrayGet(pBlock->pDataBlock, tsIndex); + + if (pWindow->skey != INT64_MIN) { + qDebug("%s filter for additional history window, skey:%" PRId64, id, pWindow->skey); + + ASSERT(pCol->pData != NULL); + for (int32_t i = 0; i < pBlock->info.rows; ++i) { + int64_t* ts = (int64_t*)colDataGetData(pCol, i); + p[i] = (*ts >= pWindow->skey); + + if (!p[i]) { + hasUnqualified = true; + } + } + } else if (pWindow->ekey != INT64_MAX) { + qDebug("%s filter for additional history window, ekey:%" PRId64, id, pWindow->ekey); + for (int32_t i = 0; i < pBlock->info.rows; ++i) { + int64_t* ts = (int64_t*)colDataGetData(pCol, i); + p[i] = (*ts <= pWindow->ekey); + + if (!p[i]) { + hasUnqualified = true; + } + } + } + + if (hasUnqualified) { + trimDataBlock(pBlock, pBlock->info.rows, p); + } + + taosMemoryFree(p); + } +} + +// re-build the delete block, ONLY according to the split timestamp +static void rebuildDeleteBlockData(SSDataBlock* pBlock, STimeWindow* pWindow, const char* id) { + int32_t numOfRows = pBlock->info.rows; + bool* p = taosMemoryCalloc(numOfRows, sizeof(bool)); + bool hasUnqualified = false; + int64_t skey = pWindow->skey; + int64_t ekey = pWindow->ekey; + + SColumnInfoData* pSrcStartCol = taosArrayGet(pBlock->pDataBlock, START_TS_COLUMN_INDEX); + uint64_t* tsStartCol = (uint64_t*)pSrcStartCol->pData; + SColumnInfoData* pSrcEndCol = taosArrayGet(pBlock->pDataBlock, END_TS_COLUMN_INDEX); + uint64_t* tsEndCol = (uint64_t*)pSrcEndCol->pData; + + if (pWindow->skey != INT64_MIN) { + for (int32_t i = 0; i < numOfRows; i++) { + if (tsStartCol[i] < skey) { + tsStartCol[i] = skey; + } + + if (tsEndCol[i] >= skey) { + p[i] = true; + } else { // this row should be removed, since it is not in this query time window, which is [skey, INT64_MAX] + hasUnqualified = true; + } + } + } else if (pWindow->ekey != INT64_MAX) { + for(int32_t i = 0; i < numOfRows; ++i) { + if (tsEndCol[i] > ekey) { + tsEndCol[i] = ekey; + } + + if (tsStartCol[i] <= ekey) { + p[i] = true; + } else { + hasUnqualified = true; + } + } + } + + if (hasUnqualified) { + trimDataBlock(pBlock, pBlock->info.rows, p); + qDebug("%s re-build delete datablock, start key revised to:%"PRId64", rows:%"PRId64, id, skey, pBlock->info.rows); + } else { + qDebug("%s not update the delete block", id); + } + + taosMemoryFree(p); +} + +static int32_t setBlockIntoRes(SStreamScanInfo* pInfo, const SSDataBlock* pBlock, STimeWindow* pTimeWindow, bool filter) { SDataBlockInfo* pBlockInfo = &pInfo->pRes->info; SOperatorInfo* pOperator = pInfo->pStreamScanOp; SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + const char* id = GET_TASKID(pTaskInfo); blockDataEnsureCapacity(pInfo->pRes, pBlock->info.rows); @@ -1596,7 +1682,7 @@ static int32_t setBlockIntoRes(SStreamScanInfo* pInfo, const SSDataBlock* pBlock // currently only the tbname pseudo column if (pInfo->numOfPseudoExpr > 0) { int32_t code = addTagPseudoColumnData(&pInfo->readHandle, pInfo->pPseudoExpr, pInfo->numOfPseudoExpr, pInfo->pRes, - pBlockInfo->rows, GET_TASKID(pTaskInfo), &pTableScanInfo->base.metaCache); + pBlockInfo->rows, id, &pTableScanInfo->base.metaCache); // ignore the table not exists error, since this table may have been dropped during the scan procedure. if (code != TSDB_CODE_SUCCESS && code != TSDB_CODE_PAR_TABLE_NOT_EXIST) { blockDataFreeRes((SSDataBlock*)pBlock); @@ -1611,8 +1697,14 @@ static int32_t setBlockIntoRes(SStreamScanInfo* pInfo, const SSDataBlock* pBlock doFilter(pInfo->pRes, pOperator->exprSupp.pFilterInfo, NULL); } + // filter the block extracted from WAL files, according to the time window apply additional time window filter + doBlockDataWindowFilter(pInfo->pRes, pInfo->primaryTsIndex, pTimeWindow, id); pInfo->pRes->info.dataLoad = 1; + blockDataUpdateTsWindow(pInfo->pRes, pInfo->primaryTsIndex); + if (pInfo->pRes->info.rows == 0) { + return 0; + } calBlockTbName(pInfo, pInfo->pRes); return 0; @@ -1645,12 +1737,13 @@ static SSDataBlock* doQueueScan(SOperatorInfo* pOperator) { pAPI->tsdReader.tsdReaderClose(pTSInfo->base.dataReader); pTSInfo->base.dataReader = NULL; - qDebug("queue scan tsdb over, switch to wal ver %" PRId64 "", pTaskInfo->streamInfo.snapshotVer + 1); - if (pAPI->tqReaderFn.tqReaderSeek(pInfo->tqReader, pTaskInfo->streamInfo.snapshotVer + 1, pTaskInfo->id.str) < 0) { + int64_t validVer = pTaskInfo->streamInfo.snapshotVer + 1; + qDebug("queue scan tsdb over, switch to wal ver %" PRId64 "", validVer); + if (pAPI->tqReaderFn.tqReaderSeek(pInfo->tqReader, validVer, pTaskInfo->id.str) < 0) { return NULL; } - tqOffsetResetToLog(&pTaskInfo->streamInfo.currentOffset, pTaskInfo->streamInfo.snapshotVer); + tqOffsetResetToLog(&pTaskInfo->streamInfo.currentOffset, validVer); } if (pTaskInfo->streamInfo.currentOffset.type == TMQ_OFFSET__LOG) { @@ -1661,14 +1754,15 @@ static SSDataBlock* doQueueScan(SOperatorInfo* pOperator) { SSDataBlock* pRes = pAPI->tqReaderFn.tqGetResultBlock(pInfo->tqReader); struct SWalReader* pWalReader = pAPI->tqReaderFn.tqReaderGetWalReader(pInfo->tqReader); - // curVersion move to next, so currentOffset = curVersion - 1 - tqOffsetResetToLog(&pTaskInfo->streamInfo.currentOffset, pWalReader->curVersion - 1); + // curVersion move to next + tqOffsetResetToLog(&pTaskInfo->streamInfo.currentOffset, pWalReader->curVersion); if (hasResult) { qDebug("doQueueScan get data from log %" PRId64 " rows, version:%" PRId64, pRes->info.rows, pTaskInfo->streamInfo.currentOffset.version); blockDataCleanup(pInfo->pRes); - setBlockIntoRes(pInfo, pRes, true); + STimeWindow defaultWindow = {.skey = INT64_MIN, .ekey = INT64_MAX}; + setBlockIntoRes(pInfo, pRes, &defaultWindow, true); if (pInfo->pRes->info.rows > 0) { return pInfo->pRes; } @@ -1800,12 +1894,15 @@ static SSDataBlock* doStreamScan(SOperatorInfo* pOperator) { qDebug("stream recover step1, verRange:%" PRId64 "-%" PRId64 " window:%"PRId64"-%"PRId64", %s", pTSInfo->base.cond.startVersion, pTSInfo->base.cond.endVersion, pTSInfo->base.cond.twindows.skey, pTSInfo->base.cond.twindows.ekey, id); pStreamInfo->recoverStep = STREAM_RECOVER_STEP__SCAN1; + pStreamInfo->recoverScanFinished = false; } else { pTSInfo->base.cond.startVersion = pStreamInfo->fillHistoryVer.minVer; pTSInfo->base.cond.endVersion = pStreamInfo->fillHistoryVer.maxVer; - qDebug("stream recover step2, verRange:%" PRId64 " - %" PRId64", %s", pTSInfo->base.cond.startVersion, - pTSInfo->base.cond.endVersion, id); - pStreamInfo->recoverStep = STREAM_RECOVER_STEP__SCAN2; + pTSInfo->base.cond.twindows = pStreamInfo->fillHistoryWindow; + qDebug("stream recover step2, verRange:%" PRId64 " - %" PRId64 ", window:%" PRId64 "-%" PRId64 ", %s", + pTSInfo->base.cond.startVersion, pTSInfo->base.cond.endVersion, pTSInfo->base.cond.twindows.skey, + pTSInfo->base.cond.twindows.ekey, id); + pStreamInfo->recoverStep = STREAM_RECOVER_STEP__NONE; } pAPI->tsdReader.tsdReaderClose(pTSInfo->base.dataReader); @@ -1815,14 +1912,10 @@ static SSDataBlock* doStreamScan(SOperatorInfo* pOperator) { pTSInfo->scanTimes = 0; pTSInfo->currentGroupId = -1; - pStreamInfo->recoverScanFinished = false; } - if (pStreamInfo->recoverStep == STREAM_RECOVER_STEP__SCAN1 || - pStreamInfo->recoverStep == STREAM_RECOVER_STEP__SCAN2) { - if (pInfo->blockRecoverContiCnt > 100) { - pInfo->blockRecoverTotCnt += pInfo->blockRecoverContiCnt; - pInfo->blockRecoverContiCnt = 0; + if (pStreamInfo->recoverStep == STREAM_RECOVER_STEP__SCAN1) { + if (isTaskKilled(pTaskInfo)) { return NULL; } @@ -1832,51 +1925,50 @@ static SSDataBlock* doStreamScan(SOperatorInfo* pOperator) { printDataBlock(pInfo->pRecoverRes, "scan recover"); return pInfo->pRecoverRes; } break; - case STREAM_SCAN_FROM_UPDATERES: { - generateScanRange(pInfo, pInfo->pUpdateDataRes, pInfo->pUpdateRes); - prepareRangeScan(pInfo, pInfo->pUpdateRes, &pInfo->updateResIndex); - pInfo->scanMode = STREAM_SCAN_FROM_DATAREADER_RANGE; - printDataBlock(pInfo->pUpdateRes, "recover update"); - return pInfo->pUpdateRes; - } break; - case STREAM_SCAN_FROM_DELETE_DATA: { - generateScanRange(pInfo, pInfo->pUpdateDataRes, pInfo->pUpdateRes); - prepareRangeScan(pInfo, pInfo->pUpdateRes, &pInfo->updateResIndex); - pInfo->scanMode = STREAM_SCAN_FROM_DATAREADER_RANGE; - copyDataBlock(pInfo->pDeleteDataRes, pInfo->pUpdateRes); - pInfo->pDeleteDataRes->info.type = STREAM_DELETE_DATA; - printDataBlock(pInfo->pDeleteDataRes, "recover delete"); - return pInfo->pDeleteDataRes; - } break; - case STREAM_SCAN_FROM_DATAREADER_RANGE: { - SSDataBlock* pSDB = doRangeScan(pInfo, pInfo->pUpdateRes, pInfo->primaryTsIndex, &pInfo->updateResIndex); - if (pSDB) { - STableScanInfo* pTableScanInfo = pInfo->pTableScanOp->info; - pSDB->info.type = pInfo->scanMode == STREAM_SCAN_FROM_DATAREADER_RANGE ? STREAM_NORMAL : STREAM_PULL_DATA; - checkUpdateData(pInfo, true, pSDB, false); - printDataBlock(pSDB, "scan recover update"); - calBlockTbName(pInfo, pSDB); - return pSDB; - } - blockDataCleanup(pInfo->pUpdateDataRes); - pInfo->scanMode = STREAM_SCAN_FROM_READERHANDLE; - } break; + // case STREAM_SCAN_FROM_UPDATERES: { + // generateScanRange(pInfo, pInfo->pUpdateDataRes, pInfo->pUpdateRes); + // prepareRangeScan(pInfo, pInfo->pUpdateRes, &pInfo->updateResIndex); + // pInfo->scanMode = STREAM_SCAN_FROM_DATAREADER_RANGE; + // printDataBlock(pInfo->pUpdateRes, "recover update"); + // return pInfo->pUpdateRes; + // } break; + // case STREAM_SCAN_FROM_DELETE_DATA: { + // generateScanRange(pInfo, pInfo->pUpdateDataRes, pInfo->pUpdateRes); + // prepareRangeScan(pInfo, pInfo->pUpdateRes, &pInfo->updateResIndex); + // pInfo->scanMode = STREAM_SCAN_FROM_DATAREADER_RANGE; + // copyDataBlock(pInfo->pDeleteDataRes, pInfo->pUpdateRes); + // pInfo->pDeleteDataRes->info.type = STREAM_DELETE_DATA; + // printDataBlock(pInfo->pDeleteDataRes, "recover delete"); + // return pInfo->pDeleteDataRes; + // } break; + // case STREAM_SCAN_FROM_DATAREADER_RANGE: { + // SSDataBlock* pSDB = doRangeScan(pInfo, pInfo->pUpdateRes, pInfo->primaryTsIndex, &pInfo->updateResIndex); + // if (pSDB) { + // STableScanInfo* pTableScanInfo = pInfo->pTableScanOp->info; + // pSDB->info.type = pInfo->scanMode == STREAM_SCAN_FROM_DATAREADER_RANGE ? STREAM_NORMAL : STREAM_PULL_DATA; + // checkUpdateData(pInfo, true, pSDB, false); + // printDataBlock(pSDB, "scan recover update"); + // calBlockTbName(pInfo, pSDB); + // return pSDB; + // } + // blockDataCleanup(pInfo->pUpdateDataRes); + // pInfo->scanMode = STREAM_SCAN_FROM_READERHANDLE; + // } break; default: break; } pInfo->pRecoverRes = doTableScan(pInfo->pTableScanOp); if (pInfo->pRecoverRes != NULL) { - pInfo->blockRecoverContiCnt++; calBlockTbName(pInfo, pInfo->pRecoverRes); if (!pInfo->igCheckUpdate && pInfo->pUpdateInfo) { - if (pStreamInfo->recoverStep == STREAM_RECOVER_STEP__SCAN1) { - TSKEY maxTs = pAPI->stateStore.updateInfoFillBlockData(pInfo->pUpdateInfo, pInfo->pRecoverRes, pInfo->primaryTsIndex); - pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, maxTs); - } else { - pInfo->pUpdateInfo->maxDataVersion = TMAX(pInfo->pUpdateInfo->maxDataVersion, pStreamInfo->fillHistoryVer.maxVer); - doCheckUpdate(pInfo, pInfo->pRecoverRes->info.window.ekey, pInfo->pRecoverRes); - } + // if (pStreamInfo->recoverStep == STREAM_RECOVER_STEP__SCAN1) { + TSKEY maxTs = pAPI->stateStore.updateInfoFillBlockData(pInfo->pUpdateInfo, pInfo->pRecoverRes, pInfo->primaryTsIndex); + pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, maxTs); + // } else { + // pInfo->pUpdateInfo->maxDataVersion = TMAX(pInfo->pUpdateInfo->maxDataVersion, pStreamInfo->fillHistoryVer.maxVer); + // doCheckUpdate(pInfo, pInfo->pRecoverRes->info.window.ekey, pInfo->pRecoverRes); + // } } if (pInfo->pCreateTbRes->info.rows > 0) { pInfo->scanMode = STREAM_SCAN_FROM_RES; @@ -1926,6 +2018,7 @@ FETCH_NEXT_BLOCK: if (pInfo->pUpdateInfo) { pInfo->pUpdateInfo->maxDataVersion = TMAX(pInfo->pUpdateInfo->maxDataVersion, pBlock->info.version); } + blockDataUpdateTsWindow(pBlock, 0); switch (pBlock->info.type) { case STREAM_NORMAL: @@ -1948,7 +2041,9 @@ FETCH_NEXT_BLOCK: } else { pDelBlock = pBlock; } + setBlockGroupIdByUid(pInfo, pDelBlock); + rebuildDeleteBlockData(pDelBlock, &pStreamInfo->fillHistoryWindow, id); printDataBlock(pDelBlock, "stream scan delete recv filtered"); if (pDelBlock->info.rows == 0) { if (pInfo->tqReader) { @@ -1956,6 +2051,7 @@ FETCH_NEXT_BLOCK: } goto FETCH_NEXT_BLOCK; } + if (!isIntervalWindow(pInfo) && !isSessionWindow(pInfo) && !isStateWindow(pInfo)) { generateDeleteResultBlock(pInfo, pDelBlock, pInfo->pDeleteDataRes); pInfo->pDeleteDataRes->info.type = STREAM_DELETE_RESULT; @@ -2047,8 +2143,7 @@ FETCH_NEXT_BLOCK: return pInfo->pUpdateRes; } - SSDataBlock* pBlock = pInfo->pRes; - SDataBlockInfo* pBlockInfo = &pBlock->info; + SDataBlockInfo* pBlockInfo = &pInfo->pRes->info; int32_t totalBlocks = taosArrayGetSize(pInfo->pBlockLists); NEXT_SUBMIT_BLK: @@ -2072,21 +2167,23 @@ FETCH_NEXT_BLOCK: } } - blockDataCleanup(pBlock); + blockDataCleanup(pInfo->pRes); while (pAPI->tqReaderFn.tqNextBlockImpl(pInfo->tqReader, id)) { SSDataBlock* pRes = NULL; int32_t code = pAPI->tqReaderFn.tqRetrieveBlock(pInfo->tqReader, &pRes, id); - qDebug("retrieve data from submit completed code:%s, rows:%" PRId64 " %s", tstrerror(code), pRes->info.rows, - id); + qDebug("retrieve data from submit completed code:%s rows:%" PRId64 " %s", tstrerror(code), pRes->info.rows, id); if (code != TSDB_CODE_SUCCESS || pRes->info.rows == 0) { qDebug("retrieve data failed, try next block in submit block, %s", id); continue; } - setBlockIntoRes(pInfo, pRes, false); + setBlockIntoRes(pInfo, pRes, &pStreamInfo->fillHistoryWindow, false); + if (pInfo->pRes->info.rows == 0) { + continue; + } if (pInfo->pCreateTbRes->info.rows > 0) { pInfo->scanMode = STREAM_SCAN_FROM_RES; @@ -2094,42 +2191,12 @@ FETCH_NEXT_BLOCK: return pInfo->pCreateTbRes; } - doCheckUpdate(pInfo, pBlockInfo->window.ekey, pBlock); - doFilter(pBlock, pOperator->exprSupp.pFilterInfo, NULL); - - { // do additional time window filter - STimeWindow* pWindow = &pStreamInfo->fillHistoryWindow; - - if (pWindow->skey != INT64_MIN) { - qDebug("%s filter for additional history window, skey:%"PRId64, id, pWindow->skey); - - bool* p = taosMemoryCalloc(pBlock->info.rows, sizeof(bool)); - bool hasUnqualified = false; - - SColumnInfoData* pCol = taosArrayGet(pBlock->pDataBlock, pInfo->primaryTsIndex); - for(int32_t i = 0; i < pBlock->info.rows; ++i) { - int64_t* ts = (int64_t*) colDataGetData(pCol, i); - p[i] = (*ts >= pWindow->skey); - - if (!p[i]) { - hasUnqualified = true; - } - } - - if (hasUnqualified) { - trimDataBlock(pBlock, pBlock->info.rows, p); - } - - taosMemoryFree(p); - } - } - - pBlock->info.dataLoad = 1; - blockDataUpdateTsWindow(pBlock, pInfo->primaryTsIndex); + doCheckUpdate(pInfo, pBlockInfo->window.ekey, pInfo->pRes); + doFilter(pInfo->pRes, pOperator->exprSupp.pFilterInfo, NULL); - qDebug("%s %" PRId64 " rows in datablock, update res:%" PRId64, id, pBlockInfo->rows, - pInfo->pUpdateDataRes->info.rows); - if (pBlockInfo->rows > 0 || pInfo->pUpdateDataRes->info.rows > 0) { + int64_t numOfUpdateRes = pInfo->pUpdateDataRes->info.rows; + qDebug("%s %" PRId64 " rows in datablock, update res:%" PRId64, id, pBlockInfo->rows, numOfUpdateRes); + if (pBlockInfo->rows > 0 || numOfUpdateRes > 0) { break; } } @@ -2147,7 +2214,7 @@ FETCH_NEXT_BLOCK: qDebug("stream scan completed, and return source rows:%" PRId64", %s", pBlockInfo->rows, id); if (pBlockInfo->rows > 0) { - return pBlock; + return pInfo->pRes; } if (pInfo->pUpdateDataRes->info.rows > 0) { @@ -2214,7 +2281,7 @@ static SSDataBlock* doRawScan(SOperatorInfo* pOperator) { STqOffsetVal offset = {0}; if (mtInfo.uid == 0 || pInfo->sContext->withMeta == ONLY_META) { // read snapshot done, change to get data from wal qDebug("tmqsnap read snapshot done, change to get data from wal"); - tqOffsetResetToLog(&offset, pInfo->sContext->snapVersion); + tqOffsetResetToLog(&offset, pInfo->sContext->snapVersion + 1); } else { tqOffsetResetToData(&offset, mtInfo.uid, INT64_MIN); qDebug("tmqsnap change get data uid:%" PRId64 "", mtInfo.uid); @@ -2357,7 +2424,9 @@ void streamScanReloadState(SOperatorInfo* pOperator) { pInfo->stateStore.updateInfoDestroy(pInfo->pUpdateInfo); pInfo->pUpdateInfo = pUpInfo; } else { - pInfo->pUpdateInfo->minTS = TMAX(pInfo->pUpdateInfo->minTS, pUpInfo->minTS); + pInfo->stateStore.windowSBfDelete(pInfo->pUpdateInfo, 1); + pInfo->stateStore.windowSBfAdd(pInfo->pUpdateInfo, 1); + ASSERT(pInfo->pUpdateInfo->minTS > pUpInfo->minTS); pInfo->pUpdateInfo->maxDataVersion = TMAX(pInfo->pUpdateInfo->maxDataVersion, pUpInfo->maxDataVersion); SHashObj* curMap = pInfo->pUpdateInfo->pMap; void *pIte = taosHashIterate(curMap, NULL); @@ -2538,7 +2607,7 @@ SOperatorInfo* createStreamScanOperatorInfo(SReadHandle* pHandle, STableScanPhys pInfo->igCheckUpdate = pTableScanNode->igCheckUpdate; pInfo->igExpired = pTableScanNode->igExpired; pInfo->twAggSup.maxTs = INT64_MIN; - pInfo->pState = NULL; + pInfo->pState = pTaskInfo->streamInfo.pState; pInfo->stateStore = pTaskInfo->storageAPI.stateStore; pInfo->readerFn = pTaskInfo->storageAPI.tqReaderFn; @@ -2730,39 +2799,25 @@ _error: return NULL; } -static SSDataBlock* getTableDataBlockImpl(void* param) { +static SSDataBlock* getBlockForTableMergeScan(void* param) { STableMergeScanSortSourceParam* source = param; SOperatorInfo* pOperator = source->pOperator; STableMergeScanInfo* pInfo = pOperator->info; SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; SStorageAPI* pAPI = &pTaskInfo->storageAPI; - int32_t readIdx = source->readerIdx; - SSDataBlock* pBlock = source->inputBlock; + SSDataBlock* pBlock = pInfo->pReaderBlock; int32_t code = 0; - SQueryTableDataCond* pQueryCond = taosArrayGet(pInfo->queryConds, readIdx); - int64_t st = taosGetTimestampUs(); - void* p = tableListGetInfo(pInfo->base.pTableListInfo, readIdx + pInfo->tableStartIndex); - SReadHandle* pHandle = &pInfo->base.readHandle; - if (NULL == source->dataReader) { - code = pAPI->tsdReader.tsdReaderOpen(pHandle->vnode, pQueryCond, p, 1, pBlock, (void**)&source->dataReader, GET_TASKID(pTaskInfo), false, NULL); - if (code != 0) { - T_LONG_JMP(pTaskInfo->env, code); - } - } - - pInfo->base.dataReader = source->dataReader; - STsdbReader* reader = pInfo->base.dataReader; bool hasNext = false; - qTrace("tsdb/read-table-data: %p, enter next reader", reader); + STsdbReader* reader = pInfo->base.dataReader; while (true) { code = pAPI->tsdReader.tsdNextDataBlock(reader, &hasNext); if (code != 0) { pAPI->tsdReader.tsdReaderReleaseDataBlock(reader); - pInfo->base.dataReader = NULL; + qError("table merge scan fetch next data block error code: %d, %s", code, GET_TASKID(pTaskInfo)); T_LONG_JMP(pTaskInfo->env, code); } @@ -2771,9 +2826,9 @@ static SSDataBlock* getTableDataBlockImpl(void* param) { } if (isTaskKilled(pTaskInfo)) { + qInfo("table merge scan fetch next data block found task killed. %s", GET_TASKID(pTaskInfo)); pAPI->tsdReader.tsdReaderReleaseDataBlock(reader); - pInfo->base.dataReader = NULL; - T_LONG_JMP(pTaskInfo->env, pTaskInfo->code); + break; } // process this data block based on the probabilities @@ -2782,16 +2837,11 @@ static SSDataBlock* getTableDataBlockImpl(void* param) { continue; } - if (pQueryCond->order == TSDB_ORDER_ASC) { - pQueryCond->twindows.skey = pBlock->info.window.ekey + 1; - } else { - pQueryCond->twindows.ekey = pBlock->info.window.skey - 1; - } - uint32_t status = 0; code = loadDataBlock(pOperator, &pInfo->base, pBlock, &status); // code = loadDataBlockFromOneTable(pOperator, pTableScanInfo, pBlock, &status); if (code != TSDB_CODE_SUCCESS) { + qInfo("table merge scan load datablock code %d, %s", code, GET_TASKID(pTaskInfo)); T_LONG_JMP(pTaskInfo->env, code); } @@ -2809,16 +2859,9 @@ static SSDataBlock* getTableDataBlockImpl(void* param) { pOperator->resultInfo.totalRows += pBlock->info.rows; pInfo->base.readRecorder.elapsedTime += (taosGetTimestampUs() - st) / 1000.0; - qTrace("tsdb/read-table-data: %p, close reader", reader); - pInfo->base.dataReader = NULL; return pBlock; } - pAPI->tsdReader.tsdReaderClose(source->dataReader); - source->dataReader = NULL; - pInfo->base.dataReader = NULL; - blockDataDestroy(source->inputBlock); - source->inputBlock = NULL; return NULL; } @@ -2854,6 +2897,8 @@ int32_t dumpQueryTableCond(const SQueryTableDataCond* src, SQueryTableDataCond* int32_t startGroupTableMergeScan(SOperatorInfo* pOperator) { STableMergeScanInfo* pInfo = pOperator->info; SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + SReadHandle* pHandle = &pInfo->base.readHandle; + SStorageAPI* pAPI = &pTaskInfo->storageAPI; { size_t numOfTables = tableListGetSize(pInfo->base.pTableListInfo); @@ -2870,53 +2915,38 @@ int32_t startGroupTableMergeScan(SOperatorInfo* pOperator) { int32_t tableStartIdx = pInfo->tableStartIndex; int32_t tableEndIdx = pInfo->tableEndIndex; - pInfo->base.dataReader = NULL; - - // todo the total available buffer should be determined by total capacity of buffer of this task. - // the additional one is reserved for merge result - // pInfo->sortBufSize = pInfo->bufPageSize * (tableEndIdx - tableStartIdx + 1 + 1); - int32_t kWay = (TSDB_MAX_BYTES_PER_ROW * 2) / (pInfo->pResBlock->info.rowSize); - if (kWay >= 128) { - kWay = 128; - } else if (kWay <= 2) { - kWay = 2; + bool hasLimit = pInfo->limitInfo.limit.limit != -1 || pInfo->limitInfo.limit.offset != -1; + int64_t mergeLimit = -1; + if (hasLimit) { + mergeLimit = pInfo->limitInfo.limit.limit + pInfo->limitInfo.limit.offset; + } + size_t szRow = blockDataGetRowSize(pInfo->pResBlock); + if (hasLimit) { + pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_SINGLESOURCE_SORT, -1, -1, + NULL, pTaskInfo->id.str, mergeLimit, szRow+8, tsPQSortMemThreshold * 1024* 1024); } else { - int i = 2; - while (i * 2 <= kWay) i = i * 2; - kWay = i; + pInfo->sortBufSize = 2048 * pInfo->bufPageSize; + int32_t numOfBufPage = pInfo->sortBufSize / pInfo->bufPageSize; + pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_BLOCK_TS_MERGE, pInfo->bufPageSize, numOfBufPage, + pInfo->pSortInputBlock, pTaskInfo->id.str, 0, 0, 0); + + tsortSetMergeLimit(pInfo->pSortHandle, mergeLimit); } - pInfo->sortBufSize = pInfo->bufPageSize * (kWay + 1); - int32_t numOfBufPage = pInfo->sortBufSize / pInfo->bufPageSize; - pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_MULTISOURCE_MERGE, pInfo->bufPageSize, numOfBufPage, - pInfo->pSortInputBlock, pTaskInfo->id.str, 0, 0, 0); - - tsortSetFetchRawDataFp(pInfo->pSortHandle, getTableDataBlockImpl, NULL, NULL); + tsortSetFetchRawDataFp(pInfo->pSortHandle, getBlockForTableMergeScan, NULL, NULL); // one table has one data block int32_t numOfTable = tableEndIdx - tableStartIdx + 1; - pInfo->queryConds = taosArrayInit(numOfTable, sizeof(SQueryTableDataCond)); - for (int32_t i = 0; i < numOfTable; ++i) { - STableMergeScanSortSourceParam param = {0}; - param.readerIdx = i; - param.pOperator = pOperator; - param.inputBlock = createOneDataBlock(pInfo->pResBlock, false); + STableMergeScanSortSourceParam param = {0}; + param.pOperator = pOperator; + STableKeyInfo* startKeyInfo = tableListGetInfo(pInfo->base.pTableListInfo, tableStartIdx); + pAPI->tsdReader.tsdReaderOpen(pHandle->vnode, &pInfo->base.cond, startKeyInfo, numOfTable, pInfo->pReaderBlock, (void**)&pInfo->base.dataReader, GET_TASKID(pTaskInfo), false, NULL); - taosArrayPush(pInfo->sortSourceParams, ¶m); - - SQueryTableDataCond cond; - dumpQueryTableCond(&pInfo->base.cond, &cond); - taosArrayPush(pInfo->queryConds, &cond); - } - - for (int32_t i = 0; i < numOfTable; ++i) { - SSortSource* ps = taosMemoryCalloc(1, sizeof(SSortSource)); - STableMergeScanSortSourceParam* param = taosArrayGet(pInfo->sortSourceParams, i); - ps->param = param; - ps->onlyRef = true; - tsortAddSource(pInfo->pSortHandle, ps); - } + SSortSource* ps = taosMemoryCalloc(1, sizeof(SSortSource)); + ps->param = ¶m; + ps->onlyRef = true; + tsortAddSource(pInfo->pSortHandle, ps); int32_t code = tsortOpen(pInfo->pSortHandle); @@ -2932,8 +2962,6 @@ int32_t stopGroupTableMergeScan(SOperatorInfo* pOperator) { SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; SStorageAPI* pAPI = &pTaskInfo->storageAPI; - int32_t numOfTable = taosArrayGetSize(pInfo->queryConds); - SSortExecInfo sortExecInfo = tsortGetSortExecInfo(pInfo->pSortHandle); pInfo->sortExecInfo.sortMethod = sortExecInfo.sortMethod; pInfo->sortExecInfo.sortBuffer = sortExecInfo.sortBuffer; @@ -2941,24 +2969,14 @@ int32_t stopGroupTableMergeScan(SOperatorInfo* pOperator) { pInfo->sortExecInfo.readBytes += sortExecInfo.readBytes; pInfo->sortExecInfo.writeBytes += sortExecInfo.writeBytes; - for (int32_t i = 0; i < numOfTable; ++i) { - STableMergeScanSortSourceParam* param = taosArrayGet(pInfo->sortSourceParams, i); - blockDataDestroy(param->inputBlock); - pAPI->tsdReader.tsdReaderClose(param->dataReader); - param->dataReader = NULL; + if (pInfo->base.dataReader != NULL) { + pAPI->tsdReader.tsdReaderClose(pInfo->base.dataReader); + pInfo->base.dataReader = NULL; } - taosArrayClear(pInfo->sortSourceParams); tsortDestroySortHandle(pInfo->pSortHandle); pInfo->pSortHandle = NULL; - for (int32_t i = 0; i < taosArrayGetSize(pInfo->queryConds); i++) { - SQueryTableDataCond* cond = taosArrayGet(pInfo->queryConds, i); - taosMemoryFree(cond->colList); - } - taosArrayDestroy(pInfo->queryConds); - pInfo->queryConds = NULL; - resetLimitInfoForNextGroup(&pInfo->limitInfo); return TSDB_CODE_SUCCESS; } @@ -2971,28 +2989,32 @@ SSDataBlock* getSortedTableMergeScanBlockData(SSortHandle* pHandle, SSDataBlock* SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; blockDataCleanup(pResBlock); - + STupleHandle* pTupleHandle = NULL; while (1) { - STupleHandle* pTupleHandle = tsortNextTuple(pHandle); - if (pTupleHandle == NULL) { - break; + while (1) { + pTupleHandle = tsortNextTuple(pHandle); + if (pTupleHandle == NULL) { + break; + } + + appendOneRowToDataBlock(pResBlock, pTupleHandle); + if (pResBlock->info.rows >= capacity) { + break; + } } - appendOneRowToDataBlock(pResBlock, pTupleHandle); - if (pResBlock->info.rows >= capacity) { - break; + if (tsortIsClosed(pHandle)) { + terrno = TSDB_CODE_TSC_QUERY_CANCELLED; + T_LONG_JMP(pOperator->pTaskInfo->env, terrno); } - } - if (tsortIsClosed(pHandle)) { - terrno = TSDB_CODE_TSC_QUERY_CANCELLED; - T_LONG_JMP(pOperator->pTaskInfo->env, terrno); + bool limitReached = applyLimitOffset(&pInfo->limitInfo, pResBlock, pTaskInfo); + qDebug("%s get sorted row block, rows:%" PRId64 ", limit:%" PRId64, GET_TASKID(pTaskInfo), pResBlock->info.rows, + pInfo->limitInfo.numOfOutputRows); + if (pTupleHandle == NULL || limitReached || pResBlock->info.rows > 0) { + break; + } } - - bool limitReached = applyLimitOffset(&pInfo->limitInfo, pResBlock, pTaskInfo); - qDebug("%s get sorted row block, rows:%" PRId64 ", limit:%" PRId64, GET_TASKID(pTaskInfo), pResBlock->info.rows, - pInfo->limitInfo.numOfOutputRows); - return (pResBlock->info.rows > 0) ? pResBlock : NULL; } @@ -3056,14 +3078,7 @@ void destroyTableMergeScanOperatorInfo(void* param) { STableMergeScanInfo* pTableScanInfo = (STableMergeScanInfo*)param; cleanupQueryTableDataCond(&pTableScanInfo->base.cond); - int32_t numOfTable = taosArrayGetSize(pTableScanInfo->queryConds); - - for (int32_t i = 0; i < numOfTable; i++) { - STableMergeScanSortSourceParam* p = taosArrayGet(pTableScanInfo->sortSourceParams, i); - blockDataDestroy(p->inputBlock); - pTableScanInfo->base.readerAPI.tsdReaderClose(p->dataReader); - p->dataReader = NULL; - } + int32_t numOfTable = taosArrayGetSize(pTableScanInfo->sortSourceParams); pTableScanInfo->base.readerAPI.tsdReaderClose(pTableScanInfo->base.dataReader); pTableScanInfo->base.dataReader = NULL; @@ -3072,16 +3087,11 @@ void destroyTableMergeScanOperatorInfo(void* param) { tsortDestroySortHandle(pTableScanInfo->pSortHandle); pTableScanInfo->pSortHandle = NULL; - for (int i = 0; i < taosArrayGetSize(pTableScanInfo->queryConds); i++) { - SQueryTableDataCond* pCond = taosArrayGet(pTableScanInfo->queryConds, i); - taosMemoryFree(pCond->colList); - } - - taosArrayDestroy(pTableScanInfo->queryConds); destroyTableScanBase(&pTableScanInfo->base, &pTableScanInfo->base.readerAPI); pTableScanInfo->pResBlock = blockDataDestroy(pTableScanInfo->pResBlock); pTableScanInfo->pSortInputBlock = blockDataDestroy(pTableScanInfo->pSortInputBlock); + pTableScanInfo->pReaderBlock = blockDataDestroy(pTableScanInfo->pReaderBlock); taosArrayDestroy(pTableScanInfo->pSortInfo); taosMemoryFreeClear(param); @@ -3143,6 +3153,8 @@ SOperatorInfo* createTableMergeScanOperatorInfo(STableScanPhysiNode* pTableScanN pInfo->base.scanFlag = MAIN_SCAN; pInfo->base.readHandle = *readHandle; + pInfo->readIdx = -1; + pInfo->base.limitInfo.limit.limit = -1; pInfo->base.limitInfo.slimit.limit = -1; pInfo->base.pTableListInfo = pTableListInfo; @@ -3165,6 +3177,8 @@ SOperatorInfo* createTableMergeScanOperatorInfo(STableScanPhysiNode* pTableScanN pInfo->pSortInputBlock = createOneDataBlock(pInfo->pResBlock, false); initLimitInfo(pTableScanNode->scan.node.pLimit, pTableScanNode->scan.node.pSlimit, &pInfo->limitInfo); + pInfo->pReaderBlock = createOneDataBlock(pInfo->pResBlock, false); + int32_t rowSize = pInfo->pResBlock->info.rowSize; uint32_t nCols = taosArrayGetSize(pInfo->pResBlock->pDataBlock); pInfo->bufPageSize = getProperSortPageSize(rowSize, nCols); @@ -3573,6 +3587,4 @@ static void destoryTableCountScanOperator(void* param) { taosArrayDestroy(pTableCountScanInfo->stbUidList); taosMemoryFreeClear(param); -} - -// clang-format on +} \ No newline at end of file diff --git a/source/libs/executor/src/sortoperator.c b/source/libs/executor/src/sortoperator.c index 9c70a95389784fd6520c8d7ce5730a371904a0a8..459474d06e79ea0c8407509a765d887c2cd3d469 100644 --- a/source/libs/executor/src/sortoperator.c +++ b/source/libs/executor/src/sortoperator.c @@ -54,19 +54,19 @@ SOperatorInfo* createSortOperatorInfo(SOperatorInfo* downstream, SSortPhysiNode* int32_t numOfCols = 0; pOperator->exprSupp.pExprInfo = createExprInfo(pSortNode->pExprs, NULL, &numOfCols); pOperator->exprSupp.numOfExprs = numOfCols; - calcSortOperMaxTupleLength(pInfo, pSortNode->pSortKeys); - pInfo->maxRows = -1; - if (pSortNode->node.pLimit) { - SLimitNode* pLimit = (SLimitNode*)pSortNode->node.pLimit; - if (pLimit->limit > 0) pInfo->maxRows = pLimit->limit; - } - int32_t numOfOutputCols = 0; int32_t code = extractColMatchInfo(pSortNode->pTargets, pDescNode, &numOfOutputCols, COL_MATCH_FROM_SLOT_ID, &pInfo->matchInfo); if (code != TSDB_CODE_SUCCESS) { goto _error; } + + calcSortOperMaxTupleLength(pInfo, pSortNode->pSortKeys); + pInfo->maxRows = -1; + if (pSortNode->node.pLimit) { + SLimitNode* pLimit = (SLimitNode*)pSortNode->node.pLimit; + if (pLimit->limit > 0) pInfo->maxRows = pLimit->limit + pLimit->offset; + } pOperator->exprSupp.pCtx = createSqlFunctionCtx(pOperator->exprSupp.pExprInfo, numOfCols, &pOperator->exprSupp.rowEntryInfoOffset, &pTaskInfo->storageAPI.functionStore); diff --git a/source/libs/executor/src/tfill.c b/source/libs/executor/src/tfill.c index 55ef019d767da89c18a7caf8db31737c68bf99af..4e0dff9d4f3474c2b2335d79b0cde362f566dfa1 100644 --- a/source/libs/executor/src/tfill.c +++ b/source/libs/executor/src/tfill.c @@ -66,20 +66,25 @@ static void setNullRow(SSDataBlock* pBlock, SFillInfo* pFillInfo, int32_t rowInd } static void doSetUserSpecifiedValue(SColumnInfoData* pDst, SVariant* pVar, int32_t rowIndex, int64_t currentKey) { + bool isNull = (TSDB_DATA_TYPE_NULL == pVar->nType) ? true : false; if (pDst->info.type == TSDB_DATA_TYPE_FLOAT) { float v = 0; - GET_TYPED_DATA(v, float, pVar->nType, &pVar->i); - colDataSetVal(pDst, rowIndex, (char*)&v, false); + GET_TYPED_DATA(v, float, pVar->nType, &pVar->f); + colDataSetVal(pDst, rowIndex, (char*)&v, isNull); } else if (pDst->info.type == TSDB_DATA_TYPE_DOUBLE) { double v = 0; - GET_TYPED_DATA(v, double, pVar->nType, &pVar->i); - colDataSetVal(pDst, rowIndex, (char*)&v, false); + GET_TYPED_DATA(v, double, pVar->nType, &pVar->d); + colDataSetVal(pDst, rowIndex, (char*)&v, isNull); } else if (IS_SIGNED_NUMERIC_TYPE(pDst->info.type)) { int64_t v = 0; GET_TYPED_DATA(v, int64_t, pVar->nType, &pVar->i); - colDataSetVal(pDst, rowIndex, (char*)&v, false); + colDataSetVal(pDst, rowIndex, (char*)&v, isNull); + } else if (IS_UNSIGNED_NUMERIC_TYPE(pDst->info.type)) { + uint64_t v = 0; + GET_TYPED_DATA(v, uint64_t, pVar->nType, &pVar->u); + colDataSetVal(pDst, rowIndex, (char*)&v, isNull); } else if (pDst->info.type == TSDB_DATA_TYPE_TIMESTAMP) { - colDataSetVal(pDst, rowIndex, (const char*)¤tKey, false); + colDataSetVal(pDst, rowIndex, (const char*)¤tKey, isNull); } else { // varchar/nchar data colDataSetNULL(pDst, rowIndex); } diff --git a/source/libs/executor/src/timesliceoperator.c b/source/libs/executor/src/timesliceoperator.c index cb74392a10953331d76a8d6af46f91296812ea13..b01998564513d20d17267a64776d0d20f2461c96 100644 --- a/source/libs/executor/src/timesliceoperator.c +++ b/source/libs/executor/src/timesliceoperator.c @@ -312,6 +312,7 @@ static bool genInterpolationResult(STimeSliceOperatorInfo* pSliceInfo, SExprSupp case TSDB_FILL_SET_VALUE_F: { SVariant* pVar = &pSliceInfo->pFillColInfo[fillColIndex].fillVal; + bool isNull = (TSDB_DATA_TYPE_NULL == pVar->nType) ? true : false; if (pDst->info.type == TSDB_DATA_TYPE_FLOAT) { float v = 0; if (!IS_VAR_DATA_TYPE(pVar->nType)) { @@ -319,7 +320,7 @@ static bool genInterpolationResult(STimeSliceOperatorInfo* pSliceInfo, SExprSupp } else { v = taosStr2Float(varDataVal(pVar->pz), NULL); } - colDataSetVal(pDst, rows, (char*)&v, false); + colDataSetVal(pDst, rows, (char*)&v, isNull); } else if (pDst->info.type == TSDB_DATA_TYPE_DOUBLE) { double v = 0; if (!IS_VAR_DATA_TYPE(pVar->nType)) { @@ -327,7 +328,7 @@ static bool genInterpolationResult(STimeSliceOperatorInfo* pSliceInfo, SExprSupp } else { v = taosStr2Double(varDataVal(pVar->pz), NULL); } - colDataSetVal(pDst, rows, (char*)&v, false); + colDataSetVal(pDst, rows, (char*)&v, isNull); } else if (IS_SIGNED_NUMERIC_TYPE(pDst->info.type)) { int64_t v = 0; if (!IS_VAR_DATA_TYPE(pVar->nType)) { @@ -335,7 +336,7 @@ static bool genInterpolationResult(STimeSliceOperatorInfo* pSliceInfo, SExprSupp } else { v = taosStr2Int64(varDataVal(pVar->pz), NULL, 10); } - colDataSetVal(pDst, rows, (char*)&v, false); + colDataSetVal(pDst, rows, (char*)&v, isNull); } else if (IS_UNSIGNED_NUMERIC_TYPE(pDst->info.type)) { uint64_t v = 0; if (!IS_VAR_DATA_TYPE(pVar->nType)) { @@ -343,7 +344,7 @@ static bool genInterpolationResult(STimeSliceOperatorInfo* pSliceInfo, SExprSupp } else { v = taosStr2UInt64(varDataVal(pVar->pz), NULL, 10); } - colDataSetVal(pDst, rows, (char*)&v, false); + colDataSetVal(pDst, rows, (char*)&v, isNull); } else if (IS_BOOLEAN_TYPE(pDst->info.type)) { bool v = false; if (!IS_VAR_DATA_TYPE(pVar->nType)) { @@ -351,7 +352,7 @@ static bool genInterpolationResult(STimeSliceOperatorInfo* pSliceInfo, SExprSupp } else { v = taosStr2Int8(varDataVal(pVar->pz), NULL, 10); } - colDataSetVal(pDst, rows, (char*)&v, false); + colDataSetVal(pDst, rows, (char*)&v, isNull); } ++fillColIndex; diff --git a/source/libs/executor/src/timewindowoperator.c b/source/libs/executor/src/timewindowoperator.c index 51da73d4d7b12d1eb84cded9c9921706cbc1f302..3a5ff91f68b2ee8a047a0dae481d149be9ba13f2 100644 --- a/source/libs/executor/src/timewindowoperator.c +++ b/source/libs/executor/src/timewindowoperator.c @@ -2753,6 +2753,7 @@ void streamIntervalReloadState(SOperatorInfo* pOperator) { strlen(STREAM_INTERVAL_OP_STATE_NAME), &pBuf, &size); TSKEY ts = *(TSKEY*)pBuf; taosMemoryFree(pBuf); + pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, ts); pInfo->statestore.streamStateReloadInfo(pInfo->pState, ts); } SOperatorInfo* downstream = pOperator->pDownstream[0]; @@ -2914,6 +2915,7 @@ void destroyStreamSessionAggOperatorInfo(void* param) { blockDataDestroy(pInfo->pDelRes); blockDataDestroy(pInfo->pWinBlock); blockDataDestroy(pInfo->pUpdateRes); + tSimpleHashCleanup(pInfo->pStUpdated); tSimpleHashCleanup(pInfo->pStDeleted); taosArrayDestroy(pInfo->historyWins); @@ -3008,14 +3010,6 @@ int32_t initStreamAggSupporter(SStreamAggSupporter* pSup, SqlFunctionCtx* pCtx, pCtx[i].saveHandle.pBuf = pSup->pResultBuf; } - if (pHandle) { - pSup->winRange = pHandle->winRange; - // temporary - if (pSup->winRange.ekey <= 0) { - pSup->winRange.ekey = INT64_MAX; - } - } - pSup->pSessionAPI = pApi; return TSDB_CODE_SUCCESS; @@ -3063,11 +3057,12 @@ void setSessionOutputBuf(SStreamAggSupporter* pAggSup, TSKEY startTs, TSKEY endT if (code == TSDB_CODE_SUCCESS && !inWinRange(&pAggSup->winRange, &pCurWin->sessionWin.win)) { code = TSDB_CODE_FAILED; releaseOutputBuf(pAggSup->pState, NULL, (SResultRow*)pCurWin->pOutputBuf, &pAggSup->pSessionAPI->stateStore); - pCurWin->pOutputBuf = taosMemoryMalloc(size); + pCurWin->pOutputBuf = taosMemoryCalloc(1, size); } if (code == TSDB_CODE_SUCCESS) { pCurWin->isOutput = true; + pAggSup->stateStore.streamStateSessionDel(pAggSup->pState, &pCurWin->sessionWin); } else { pCurWin->sessionWin.win.skey = startTs; pCurWin->sessionWin.win.ekey = endTs; @@ -3197,11 +3192,12 @@ SStreamStateCur* getNextSessionWinInfo(SStreamAggSupporter* pAggSup, SSHashObj* return pCur; } -static void compactSessionWindow(SOperatorInfo* pOperator, SResultWindowInfo* pCurWin, SSHashObj* pStUpdated, - SSHashObj* pStDeleted) { - SExprSupp* pSup = &pOperator->exprSupp; - SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; - SStorageAPI* pAPI = &pOperator->pTaskInfo->storageAPI; +static int32_t compactSessionWindow(SOperatorInfo* pOperator, SResultWindowInfo* pCurWin, SSHashObj* pStUpdated, + SSHashObj* pStDeleted, bool addGap) { + SExprSupp* pSup = &pOperator->exprSupp; + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + SStorageAPI* pAPI = &pOperator->pTaskInfo->storageAPI; + int32_t winNum = 0; SStreamSessionAggOperatorInfo* pInfo = pOperator->info; SResultRow* pCurResult = NULL; @@ -3222,7 +3218,7 @@ static void compactSessionWindow(SOperatorInfo* pOperator, SResultWindowInfo* pC initSessionOutputBuf(&winInfo, &pWinResult, pAggSup->pDummyCtx, numOfOutput, pSup->rowEntryInfoOffset); pCurWin->sessionWin.win.ekey = TMAX(pCurWin->sessionWin.win.ekey, winInfo.sessionWin.win.ekey); int64_t winDelta = 0; - if (IS_FINAL_OP(pInfo)) { + if (addGap) { winDelta = pAggSup->gap; } updateTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &pCurWin->sessionWin.win, winDelta); @@ -3235,11 +3231,14 @@ static void compactSessionWindow(SOperatorInfo* pOperator, SResultWindowInfo* pC doDeleteSessionWindow(pAggSup, &winInfo.sessionWin); pAPI->stateStore.streamStateFreeCur(pCur); taosMemoryFree(winInfo.pOutputBuf); + winNum++; } + return winNum; } int32_t saveSessionOutputBuf(SStreamAggSupporter* pAggSup, SResultWindowInfo* pWinInfo) { saveSessionDiscBuf(pAggSup->pState, &pWinInfo->sessionWin, pWinInfo->pOutputBuf, pAggSup->resultRowSize, &pAggSup->stateStore); + pWinInfo->pOutputBuf = NULL; return TSDB_CODE_SUCCESS; } @@ -3253,8 +3252,13 @@ static void doStreamSessionAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSData SResultRow* pResult = NULL; int32_t rows = pSDataBlock->info.rows; int32_t winRows = 0; + SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; pInfo->dataVersion = TMAX(pInfo->dataVersion, pSDataBlock->info.version); + pAggSup->winRange = pTaskInfo->streamInfo.fillHistoryWindow; + if (pAggSup->winRange.ekey <= 0) { + pAggSup->winRange.ekey = INT64_MAX; + } SColumnInfoData* pStartTsCol = taosArrayGet(pSDataBlock->pDataBlock, pInfo->primaryTsIndex); TSKEY* startTsCols = (int64_t*)pStartTsCol->pData; @@ -3266,7 +3270,6 @@ static void doStreamSessionAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSData } TSKEY* endTsCols = (int64_t*)pEndTsCol->pData; - SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; for (int32_t i = 0; i < rows;) { if (pInfo->ignoreExpiredData && isOverdue(endTsCols[i], &pInfo->twAggSup)) { i++; @@ -3291,7 +3294,7 @@ static void doStreamSessionAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSData if (code != TSDB_CODE_SUCCESS || pResult == NULL) { T_LONG_JMP(pTaskInfo->env, TSDB_CODE_OUT_OF_MEMORY); } - compactSessionWindow(pOperator, &winInfo, pStUpdated, pStDeleted); + compactSessionWindow(pOperator, &winInfo, pStUpdated, pStDeleted, addGap); saveSessionOutputBuf(pAggSup, &winInfo); if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE && pStUpdated) { @@ -3455,7 +3458,7 @@ static void rebuildSessionWindow(SOperatorInfo* pOperator, SArray* pWinArray, SS initSessionOutputBuf(&childWin, &pChResult, pChild->exprSupp.pCtx, numOfOutput, pChild->exprSupp.rowEntryInfoOffset); compactFunctions(pSup->pCtx, pChild->exprSupp.pCtx, numOfOutput, pTaskInfo, &pInfo->twAggSup.timeWindowData); - compactSessionWindow(pOperator, &parentWin, pStUpdated, NULL); + compactSessionWindow(pOperator, &parentWin, pStUpdated, NULL, true); saveResult(parentWin, pStUpdated); } else { break; @@ -3707,8 +3710,8 @@ void streamSessionReleaseState(SOperatorInfo* pOperator) { } void resetWinRange(STimeWindow* winRange) { - winRange->skey = INT16_MIN; - winRange->skey = INT16_MAX; + winRange->skey = INT64_MIN; + winRange->ekey = INT64_MAX; } void streamSessionReloadState(SOperatorInfo* pOperator) { @@ -3724,10 +3727,27 @@ void streamSessionReloadState(SOperatorInfo* pOperator) { int32_t num = size / sizeof(SSessionKey); SSessionKey* pSeKeyBuf = (SSessionKey*) pBuf; ASSERT(size == num * sizeof(SSessionKey)); + if (!pInfo->pStUpdated && num > 0) { + _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); + pInfo->pStUpdated = tSimpleHashInit(64, hashFn); + } for (int32_t i = 0; i < num; i++) { SResultWindowInfo winInfo = {0}; setSessionOutputBuf(pAggSup, pSeKeyBuf[i].win.skey, pSeKeyBuf[i].win.ekey, pSeKeyBuf[i].groupId, &winInfo); - compactSessionWindow(pOperator, &winInfo, pInfo->pStUpdated, pInfo->pStDeleted); + int32_t winNum = compactSessionWindow(pOperator, &winInfo, pInfo->pStUpdated, pInfo->pStDeleted, true); + if (winNum > 0) { + if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE) { + saveResult(winInfo, pInfo->pStUpdated); + } else if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_WINDOW_CLOSE) { + if (!isCloseWindow(&winInfo.sessionWin.win, &pInfo->twAggSup)) { + saveDeleteRes(pInfo->pStDeleted, winInfo.sessionWin); + } + SSessionKey key = {0}; + getSessionHashKey(&winInfo.sessionWin, &key); + tSimpleHashPut(pAggSup->pResultRows, &key, sizeof(SSessionKey), &winInfo, sizeof(SResultWindowInfo)); + } + } + saveSessionOutputBuf(pAggSup, &winInfo); } taosMemoryFree(pBuf); @@ -4019,6 +4039,7 @@ void destroyStreamStateOperatorInfo(void* param) { colDataDestroy(&pInfo->twAggSup.timeWindowData); blockDataDestroy(pInfo->pDelRes); taosArrayDestroy(pInfo->historyWins); + tSimpleHashCleanup(pInfo->pSeUpdated); tSimpleHashCleanup(pInfo->pSeDeleted); taosMemoryFreeClear(param); } @@ -4036,13 +4057,20 @@ bool isEqualStateKey(SStateWindowInfo* pWin, char* pKeyData) { bool compareStateKey(void* data, void* key) { if (!data || !key) { - return false; + return true; } SStateKeys* stateKey = (SStateKeys*)key; stateKey->pData = (char*)key + sizeof(SStateKeys); return compareVal(data, stateKey); } +bool compareWinStateKey(SStateKeys* left, SStateKeys* right) { + if (!left || !right) { + return false; + } + return compareVal(left->pData, right); +} + void setStateOutputBuf(SStreamAggSupporter* pAggSup, TSKEY ts, uint64_t groupId, char* pKeyData, SStateWindowInfo* pCurWin, SStateWindowInfo* pNextWin) { int32_t size = pAggSup->resultRowSize; @@ -4065,14 +4093,19 @@ void setStateOutputBuf(SStreamAggSupporter* pAggSup, TSKEY ts, uint64_t groupId, pCurWin->winInfo.pOutputBuf = taosMemoryCalloc(1, size); pCurWin->pStateKey = (SStateKeys*)((char*)pCurWin->winInfo.pOutputBuf + (pAggSup->resultRowSize - pAggSup->stateKeySize)); - pCurWin->pStateKey->bytes = pAggSup->stateKeySize - sizeof(SStateKeys); - pCurWin->pStateKey->type = pAggSup->stateKeyType; - pCurWin->pStateKey->pData = (char*)pCurWin->pStateKey + sizeof(SStateKeys); - pCurWin->pStateKey->isNull = false; + pCurWin->pStateKey->bytes = pAggSup->stateKeySize - sizeof(SStateKeys); + pCurWin->pStateKey->type = pAggSup->stateKeyType; + pCurWin->pStateKey->pData = (char*)pCurWin->pStateKey + sizeof(SStateKeys); + pCurWin->pStateKey->isNull = false; + pCurWin->winInfo.sessionWin.groupId = groupId; + pCurWin->winInfo.sessionWin.win.skey = ts; + pCurWin->winInfo.sessionWin.win.ekey = ts; + qDebug("===stream===reset state win key. skey:%" PRId64 ", endkey:%" PRId64, pCurWin->winInfo.sessionWin.win.skey, pCurWin->winInfo.sessionWin.win.ekey); } if (code == TSDB_CODE_SUCCESS) { pCurWin->winInfo.isOutput = true; + pAggSup->stateStore.streamStateSessionDel(pAggSup->pState, &pCurWin->winInfo.sessionWin); } else if (pKeyData) { if (IS_VAR_DATA_TYPE(pAggSup->stateKeyType)) { varDataCopy(pCurWin->pStateKey->pData, pKeyData); @@ -4145,8 +4178,13 @@ static void doStreamStateAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSDataBl TSKEY* tsCols = NULL; SResultRow* pResult = NULL; int32_t winRows = 0; + SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; pInfo->dataVersion = TMAX(pInfo->dataVersion, pSDataBlock->info.version); + pAggSup->winRange = pTaskInfo->streamInfo.fillHistoryWindow; + if (pAggSup->winRange.ekey <= 0) { + pAggSup->winRange.ekey = INT64_MAX; + } if (pSDataBlock->pDataBlock != NULL) { SColumnInfoData* pColDataInfo = taosArrayGet(pSDataBlock->pDataBlock, pInfo->primaryTsIndex); @@ -4155,7 +4193,6 @@ static void doStreamStateAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSDataBl return; } - SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; int32_t rows = pSDataBlock->info.rows; blockDataEnsureCapacity(pAggSup->pScanBlock, rows); SColumnInfoData* pKeyColInfo = taosArrayGet(pSDataBlock->pDataBlock, pInfo->stateCol.slotId); @@ -4215,6 +4252,7 @@ static SSDataBlock* doStreamStateAgg(SOperatorInfo* pOperator) { SExprSupp* pSup = &pOperator->exprSupp; SStreamStateAggOperatorInfo* pInfo = pOperator->info; SOptrBasicInfo* pBInfo = &pInfo->binfo; + qDebug("===stream=== stream state agg"); if (pOperator->status == OP_RES_TO_RETURN) { doBuildDeleteDataBlock(pOperator, pInfo->pSeDeleted, pInfo->pDelRes, &pInfo->pDelIterator); if (pInfo->pDelRes->info.rows > 0) { @@ -4314,6 +4352,7 @@ static SSDataBlock* doStreamStateAgg(SOperatorInfo* pOperator) { void streamStateReleaseState(SOperatorInfo* pOperator) { SStreamStateAggOperatorInfo* pInfo = pOperator->info; int32_t resSize = taosArrayGetSize(pInfo->historyWins) * sizeof(SSessionKey); + qDebug("===stream=== relase state. save result count:%d", (int32_t)taosArrayGetSize(pInfo->historyWins)); pInfo->streamAggSup.stateStore.streamStateSaveInfo(pInfo->streamAggSup.pState, STREAM_STATE_OP_STATE_NAME, strlen(STREAM_STATE_OP_STATE_NAME), pInfo->historyWins->pData, resSize); SOperatorInfo* downstream = pOperator->pDownstream[0]; if (downstream->fpSet.releaseStreamStateFn) { @@ -4335,20 +4374,20 @@ static void compactStateWindow(SOperatorInfo* pOperator, SResultWindowInfo* pCur SResultRow* pWinResult = NULL; initSessionOutputBuf(pNextWin, &pWinResult, pAggSup->pDummyCtx, numOfOutput, pSup->rowEntryInfoOffset); - updateTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &pCurWin->sessionWin.win, true); + updateTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &pCurWin->sessionWin.win, 1); compactFunctions(pSup->pCtx, pAggSup->pDummyCtx, numOfOutput, pTaskInfo, &pInfo->twAggSup.timeWindowData); tSimpleHashRemove(pStUpdated, &pNextWin->sessionWin, sizeof(SSessionKey)); if (pNextWin->isOutput && pStDeleted) { + qDebug("===stream=== save delete window info %" PRId64 ", %" PRIu64, pNextWin->sessionWin.win.skey, pNextWin->sessionWin.groupId); saveDeleteRes(pStDeleted, pNextWin->sessionWin); } removeSessionResult(pStUpdated, pAggSup->pResultRows, pNextWin->sessionWin); doDeleteSessionWindow(pAggSup, &pNextWin->sessionWin); taosMemoryFree(pNextWin->pOutputBuf); - saveSessionOutputBuf(pAggSup, pCurWin); } void streamStateReloadState(SOperatorInfo* pOperator) { - SStreamSessionAggOperatorInfo* pInfo = pOperator->info; + SStreamStateAggOperatorInfo* pInfo = pOperator->info; SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; resetWinRange(&pAggSup->winRange); @@ -4358,24 +4397,44 @@ void streamStateReloadState(SOperatorInfo* pOperator) { int32_t code = pAggSup->stateStore.streamStateGetInfo(pAggSup->pState, STREAM_STATE_OP_STATE_NAME, strlen(STREAM_STATE_OP_STATE_NAME), &pBuf, &size); int32_t num = size / sizeof(SSessionKey); + qDebug("===stream=== reload state. get result count:%d", num); SSessionKey* pSeKeyBuf = (SSessionKey*) pBuf; ASSERT(size == num * sizeof(SSessionKey)); + if (!pInfo->pSeUpdated && num > 0) { + _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); + pInfo->pSeUpdated = tSimpleHashInit(64, hashFn); + } + if (!pInfo->pSeDeleted && num > 0) { + _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); + pInfo->pSeDeleted = tSimpleHashInit(64, hashFn); + } for (int32_t i = 0; i < num; i++) { SStateWindowInfo curInfo = {0}; SStateWindowInfo nextInfo = {0}; SStateWindowInfo dummy = {0}; + qDebug("===stream=== reload state. try process result %" PRId64 ", %" PRIu64 ", index:%d", pSeKeyBuf[i].win.skey, pSeKeyBuf[i].groupId, i); setStateOutputBuf(pAggSup, pSeKeyBuf[i].win.skey, pSeKeyBuf[i].groupId, NULL, &curInfo, &nextInfo); - if (compareStateKey(curInfo.pStateKey,nextInfo.pStateKey)) { - compactStateWindow(pOperator, &curInfo.winInfo, &nextInfo.winInfo, pInfo->pStUpdated, pInfo->pStDeleted); - saveResult(curInfo.winInfo, pInfo->pStUpdated); + bool cpRes = compareWinStateKey(curInfo.pStateKey,nextInfo.pStateKey); + qDebug("===stream=== reload state. next window info %" PRId64 ", %" PRIu64 ", compare:%d", nextInfo.winInfo.sessionWin.win.skey, nextInfo.winInfo.sessionWin.groupId, cpRes); + if (cpRes) { + compactStateWindow(pOperator, &curInfo.winInfo, &nextInfo.winInfo, pInfo->pSeUpdated, pInfo->pSeDeleted); + qDebug("===stream=== reload state. save result %" PRId64 ", %" PRIu64, curInfo.winInfo.sessionWin.win.skey, curInfo.winInfo.sessionWin.groupId); + if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE) { + saveResult(curInfo.winInfo, pInfo->pSeUpdated); + } else if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_WINDOW_CLOSE) { + if (!isCloseWindow(&curInfo.winInfo.sessionWin.win, &pInfo->twAggSup)) { + saveDeleteRes(pInfo->pSeDeleted, curInfo.winInfo.sessionWin); + } + SSessionKey key = {0}; + getSessionHashKey(&curInfo.winInfo.sessionWin, &key); + tSimpleHashPut(pAggSup->pResultRows, &key, sizeof(SSessionKey), &curInfo.winInfo, sizeof(SResultWindowInfo)); + } + } else if (IS_VALID_SESSION_WIN(nextInfo.winInfo)) { + releaseOutputBuf(pAggSup->pState, NULL, (SResultRow*)nextInfo.winInfo.pOutputBuf, &pAggSup->pSessionAPI->stateStore); } if (IS_VALID_SESSION_WIN(curInfo.winInfo)) { - releaseOutputBuf(pAggSup->pState, NULL, (SResultRow*)curInfo.winInfo.pOutputBuf, &pAggSup->pSessionAPI->stateStore); - } - - if (IS_VALID_SESSION_WIN(nextInfo.winInfo)) { - releaseOutputBuf(pAggSup->pState, NULL, (SResultRow*)nextInfo.winInfo.pOutputBuf, &pAggSup->pSessionAPI->stateStore); + saveSessionOutputBuf(pAggSup, &curInfo.winInfo); } } taosMemoryFree(pBuf); @@ -4606,6 +4665,7 @@ static void doMergeAlignedIntervalAgg(SOperatorInfo* pOperator) { finalizeResultRows(pIaInfo->aggSup.pResultBuf, &pResultRowInfo->cur, pSup, pRes, pTaskInfo); resetResultRow(pMiaInfo->pResultRow, pIaInfo->aggSup.resultRowSize - sizeof(SResultRow)); cleanupAfterGroupResultGen(pMiaInfo, pRes); + doFilter(pRes, pOperator->exprSupp.pFilterInfo, NULL); } setOperatorCompleted(pOperator); @@ -4626,6 +4686,7 @@ static void doMergeAlignedIntervalAgg(SOperatorInfo* pOperator) { pMiaInfo->prefetchedBlock = pBlock; cleanupAfterGroupResultGen(pMiaInfo, pRes); + doFilter(pRes, pOperator->exprSupp.pFilterInfo, NULL); break; } else { // continue @@ -4855,7 +4916,7 @@ static void doMergeIntervalAggImpl(SOperatorInfo* pOperatorInfo, SResultRowInfo* doWindowBorderInterpolation(iaInfo, pBlock, pResult, &win, startPos, forwardRows, pExprSup); } - updateTimeWindowInfo(&iaInfo->twAggSup.timeWindowData, &win, true); + updateTimeWindowInfo(&iaInfo->twAggSup.timeWindowData, &win, 1); applyAggFunctionOnPartialTuples(pTaskInfo, pExprSup->pCtx, &iaInfo->twAggSup.timeWindowData, startPos, forwardRows, pBlock->info.rows, numOfOutput); doCloseWindow(pResultRowInfo, iaInfo, pResult); diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index d26db6536facc3d36e9de03afefbf2dc923e32af..0a8d7ee376b9a92a7c4bc5fb7544de0f6bb77030 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -24,6 +24,7 @@ #include "tpagedbuf.h" #include "tsort.h" #include "tutil.h" +#include "tsimplehash.h" struct STupleHandle { SSDataBlock* pBlock; @@ -42,13 +43,16 @@ struct SSortHandle { int64_t startTs; uint64_t totalElapsed; - uint64_t maxRows; - uint32_t maxTupleLength; - uint32_t sortBufSize; + uint64_t pqMaxRows; + uint32_t pqMaxTupleLength; + uint32_t pqSortBufSize; bool forceUsePQSort; BoundedQueue* pBoundedQueue; uint32_t tmpRowIdx; + int64_t mergeLimit; + int64_t currMergeLimitTs; + int32_t sourceId; SSDataBlock* pDataBlock; SMsortComparParam cmpParam; @@ -173,8 +177,8 @@ void destroyTuple(void* t) { * @return */ SSortHandle* tsortCreateSortHandle(SArray* pSortInfo, int32_t type, int32_t pageSize, int32_t numOfPages, - SSDataBlock* pBlock, const char* idstr, uint64_t maxRows, uint32_t maxTupleLength, - uint32_t sortBufSize) { + SSDataBlock* pBlock, const char* idstr, uint64_t pqMaxRows, uint32_t pqMaxTupleLength, + uint32_t pqSortBufSize) { SSortHandle* pSortHandle = taosMemoryCalloc(1, sizeof(SSortHandle)); pSortHandle->type = type; @@ -183,10 +187,10 @@ SSortHandle* tsortCreateSortHandle(SArray* pSortInfo, int32_t type, int32_t page pSortHandle->pSortInfo = pSortInfo; pSortHandle->loops = 0; - pSortHandle->maxTupleLength = maxTupleLength; - if (maxRows != 0) { - pSortHandle->sortBufSize = sortBufSize; - pSortHandle->maxRows = maxRows; + pSortHandle->pqMaxTupleLength = pqMaxTupleLength; + if (pqMaxRows != 0) { + pSortHandle->pqSortBufSize = pqSortBufSize; + pSortHandle->pqMaxRows = pqMaxRows; } pSortHandle->forceUsePQSort = false; @@ -194,10 +198,18 @@ SSortHandle* tsortCreateSortHandle(SArray* pSortInfo, int32_t type, int32_t page pSortHandle->pDataBlock = createOneDataBlock(pBlock, false); } + pSortHandle->mergeLimit = -1; + pSortHandle->pOrderedSource = taosArrayInit(4, POINTER_BYTES); pSortHandle->cmpParam.orderInfo = pSortInfo; pSortHandle->cmpParam.cmpGroupId = false; - + pSortHandle->cmpParam.sortType = type; + if (type == SORT_BLOCK_TS_MERGE) { + SBlockOrderInfo* pOrder = TARRAY_GET_ELEM(pSortInfo, 0); + pSortHandle->cmpParam.tsSlotId = pOrder->slotId; + pSortHandle->cmpParam.order = pOrder->order; + pSortHandle->cmpParam.cmpFn = (pOrder->order == TSDB_ORDER_ASC) ? compareInt64Val : compareInt64ValDesc; + } tsortSetComparFp(pSortHandle, msortComparFn); if (idstr != NULL) { @@ -469,11 +481,14 @@ static int32_t adjustMergeTreeForNextTuple(SSortSource* pSource, SMultiwayMergeT if (pHandle->type == SORT_SINGLESOURCE_SORT) { pSource->pageIndex++; if (pSource->pageIndex >= taosArrayGetSize(pSource->pageIdList)) { + qDebug("adjust merge tree. %d source completed %d", *numOfCompleted, pSource->pageIndex); (*numOfCompleted) += 1; pSource->src.rowIndex = -1; pSource->pageIndex = -1; pSource->src.pBlock = blockDataDestroy(pSource->src.pBlock); } else { + if (pSource->pageIndex % 512 == 0) qDebug("begin source %p page %d", pSource, pSource->pageIndex); + int32_t* pPgId = taosArrayGet(pSource->pageIdList, pSource->pageIndex); void* pPage = getBufPage(pHandle->pBuf, *pPgId); @@ -486,7 +501,6 @@ static int32_t adjustMergeTreeForNextTuple(SSortSource* pSource, SMultiwayMergeT if (code != TSDB_CODE_SUCCESS) { return code; } - releaseBufPage(pHandle->pBuf, pPage); } } else { @@ -497,6 +511,7 @@ static int32_t adjustMergeTreeForNextTuple(SSortSource* pSource, SMultiwayMergeT if (pSource->src.pBlock == NULL) { (*numOfCompleted) += 1; pSource->src.rowIndex = -1; + qDebug("adjust merge tree. %d source completed", *numOfCompleted); } } } @@ -577,53 +592,63 @@ int32_t msortComparFn(const void* pLeft, const void* pRight, void* param) { } } - for (int32_t i = 0; i < pInfo->size; ++i) { - SBlockOrderInfo* pOrder = TARRAY_GET_ELEM(pInfo, i); - SColumnInfoData* pLeftColInfoData = TARRAY_GET_ELEM(pLeftBlock->pDataBlock, pOrder->slotId); + if (pParam->sortType == SORT_BLOCK_TS_MERGE) { + SColumnInfoData* pLeftColInfoData = TARRAY_GET_ELEM(pLeftBlock->pDataBlock, pParam->tsSlotId); + SColumnInfoData* pRightColInfoData = TARRAY_GET_ELEM(pRightBlock->pDataBlock, pParam->tsSlotId); + int64_t* left1 = (int64_t*)(pLeftColInfoData->pData) + pLeftSource->src.rowIndex; + int64_t* right1 = (int64_t*)(pRightColInfoData->pData) + pRightSource->src.rowIndex; - bool leftNull = false; - if (pLeftColInfoData->hasNull) { - if (pLeftBlock->pBlockAgg == NULL) { - leftNull = colDataIsNull_s(pLeftColInfoData, pLeftSource->src.rowIndex); - } else { - leftNull = - colDataIsNull(pLeftColInfoData, pLeftBlock->info.rows, pLeftSource->src.rowIndex, pLeftBlock->pBlockAgg[i]); + int ret = pParam->cmpFn(left1, right1); + return ret; + } else { + for (int32_t i = 0; i < pInfo->size; ++i) { + SBlockOrderInfo* pOrder = TARRAY_GET_ELEM(pInfo, i); + SColumnInfoData* pLeftColInfoData = TARRAY_GET_ELEM(pLeftBlock->pDataBlock, pOrder->slotId); + SColumnInfoData* pRightColInfoData = TARRAY_GET_ELEM(pRightBlock->pDataBlock, pOrder->slotId); + + bool leftNull = false; + if (pLeftColInfoData->hasNull) { + if (pLeftBlock->pBlockAgg == NULL) { + leftNull = colDataIsNull_s(pLeftColInfoData, pLeftSource->src.rowIndex); + } else { + leftNull = colDataIsNull(pLeftColInfoData, pLeftBlock->info.rows, pLeftSource->src.rowIndex, + pLeftBlock->pBlockAgg[i]); + } } - } - SColumnInfoData* pRightColInfoData = TARRAY_GET_ELEM(pRightBlock->pDataBlock, pOrder->slotId); - bool rightNull = false; - if (pRightColInfoData->hasNull) { - if (pRightBlock->pBlockAgg == NULL) { - rightNull = colDataIsNull_s(pRightColInfoData, pRightSource->src.rowIndex); - } else { - rightNull = colDataIsNull(pRightColInfoData, pRightBlock->info.rows, pRightSource->src.rowIndex, - pRightBlock->pBlockAgg[i]); + bool rightNull = false; + if (pRightColInfoData->hasNull) { + if (pRightBlock->pBlockAgg == NULL) { + rightNull = colDataIsNull_s(pRightColInfoData, pRightSource->src.rowIndex); + } else { + rightNull = colDataIsNull(pRightColInfoData, pRightBlock->info.rows, pRightSource->src.rowIndex, + pRightBlock->pBlockAgg[i]); + } } - } - if (leftNull && rightNull) { - continue; // continue to next slot - } + if (leftNull && rightNull) { + continue; // continue to next slot + } - if (rightNull) { - return pOrder->nullFirst ? 1 : -1; - } + if (rightNull) { + return pOrder->nullFirst ? 1 : -1; + } - if (leftNull) { - return pOrder->nullFirst ? -1 : 1; - } + if (leftNull) { + return pOrder->nullFirst ? -1 : 1; + } - void* left1 = colDataGetData(pLeftColInfoData, pLeftSource->src.rowIndex); - void* right1 = colDataGetData(pRightColInfoData, pRightSource->src.rowIndex); + void* left1 = colDataGetData(pLeftColInfoData, pLeftSource->src.rowIndex); + void* right1 = colDataGetData(pRightColInfoData, pRightSource->src.rowIndex); - __compar_fn_t fn = getKeyComparFunc(pLeftColInfoData->info.type, pOrder->order); + __compar_fn_t fn = getKeyComparFunc(pLeftColInfoData->info.type, pOrder->order); - int ret = fn(left1, right1); - if (ret == 0) { - continue; - } else { - return ret; + int ret = fn(left1, right1); + if (ret == 0) { + continue; + } else { + return ret; + } } } return 0; @@ -668,6 +693,7 @@ static int32_t doInternalMergeSort(SSortHandle* pHandle) { // Only *numOfInputSources* can be loaded into buffer to perform the external sort. for (int32_t i = 0; i < sortGroup; ++i) { + qDebug("internal merge sort pass %d group %d. num input sources %d ", t, i, numOfInputSources); pHandle->sourceId += 1; int32_t end = (i + 1) * numOfInputSources - 1; @@ -690,13 +716,15 @@ static int32_t doInternalMergeSort(SSortHandle* pHandle) { return code; } + int nMergedRows = 0; + SArray* pPageIdList = taosArrayInit(4, sizeof(int32_t)); while (1) { if (tsortIsClosed(pHandle)) { code = terrno = TSDB_CODE_TSC_QUERY_CANCELLED; return code; } - + SSDataBlock* pDataBlock = getSortedBlockDataInner(pHandle, &pHandle->cmpParam, numOfRows); if (pDataBlock == NULL) { break; @@ -720,8 +748,12 @@ static int32_t doInternalMergeSort(SSortHandle* pHandle) { setBufPageDirty(pPage, true); releaseBufPage(pHandle->pBuf, pPage); + nMergedRows += pDataBlock->info.rows; blockDataCleanup(pDataBlock); + if ((pHandle->mergeLimit != -1) && (nMergedRows >= pHandle->mergeLimit)) { + break; + } } sortComparCleanup(&pHandle->cmpParam); @@ -769,114 +801,395 @@ int32_t getProperSortPageSize(size_t rowSize, uint32_t numOfCols) { return pgSize; } -static int32_t createInitialSources(SSortHandle* pHandle) { - size_t sortBufSize = pHandle->numOfPages * pHandle->pageSize; - int32_t code = 0; +static int32_t createPageBuf(SSortHandle* pHandle) { + if (pHandle->pBuf == NULL) { + if (!osTempSpaceAvailable()) { + terrno = TSDB_CODE_NO_DISKSPACE; + qError("create page buf failed since %s, tempDir:%s", terrstr(), tsTempDir); + return terrno; + } - if (pHandle->type == SORT_SINGLESOURCE_SORT) { - SSortSource** pSource = taosArrayGet(pHandle->pOrderedSource, 0); - SSortSource* source = *pSource; - *pSource = NULL; + int32_t code = createDiskbasedBuf(&pHandle->pBuf, pHandle->pageSize, pHandle->numOfPages * pHandle->pageSize, + "tableBlocksBuf", tsTempDir); + dBufSetPrintInfo(pHandle->pBuf); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + return 0; +} - tsortClearOrderdSource(pHandle->pOrderedSource, NULL, NULL); +typedef struct SBlkMergeSupport { + int64_t** aTs; + int32_t* aRowIdx; + int32_t order; +} SBlkMergeSupport; - while (1) { - SSDataBlock* pBlock = pHandle->fetchfp(source->param); - if (pBlock == NULL) { - break; - } +static int32_t blockCompareTsFn(const void* pLeft, const void* pRight, void* param) { + int32_t left = *(int32_t*)pLeft; + int32_t right = *(int32_t*)pRight; - if (pHandle->pDataBlock == NULL) { - uint32_t numOfCols = taosArrayGetSize(pBlock->pDataBlock); - pHandle->pageSize = getProperSortPageSize(blockDataGetRowSize(pBlock), numOfCols); + SBlkMergeSupport* pSup = (SBlkMergeSupport*)param; + if (pSup->aRowIdx[left] == -1) { + return 1; + } else if (pSup->aRowIdx[right] == -1) { + return -1; + } - // todo, number of pages are set according to the total available sort buffer - pHandle->numOfPages = 1024; - sortBufSize = pHandle->numOfPages * pHandle->pageSize; - pHandle->pDataBlock = createOneDataBlock(pBlock, false); - } + int64_t leftTs = pSup->aTs[left][pSup->aRowIdx[left]]; + int64_t rightTs = pSup->aTs[right][pSup->aRowIdx[right]]; - if (pHandle->beforeFp != NULL) { - pHandle->beforeFp(pBlock, pHandle->param); - } + int32_t ret = leftTs>rightTs ? 1 : ((leftTs < rightTs) ? -1 : 0); + if (pSup->order == TSDB_ORDER_DESC) { + ret = -1 * ret; + } + return ret; +} - code = blockDataMerge(pHandle->pDataBlock, pBlock); - if (code != TSDB_CODE_SUCCESS) { - if (source->param && !source->onlyRef) { - taosMemoryFree(source->param); +static int32_t appendDataBlockToPageBuf(SSortHandle* pHandle, SSDataBlock* blk, SArray* aPgId) { + int32_t pageId = -1; + void* pPage = getNewBufPage(pHandle->pBuf, &pageId); + taosArrayPush(aPgId, &pageId); + + int32_t size = blockDataGetSize(blk) + sizeof(int32_t) + taosArrayGetSize(blk->pDataBlock) * sizeof(int32_t); + ASSERT(size <= getBufPageSize(pHandle->pBuf)); + + blockDataToBuf(pPage, blk); + + setBufPageDirty(pPage, true); + releaseBufPage(pHandle->pBuf, pPage); + + return 0; +} + +static int32_t getPageBufIncForRow(SSDataBlock* blk, int32_t row, int32_t rowIdxInPage) { + int sz = 0; + int numCols = taosArrayGetSize(blk->pDataBlock); + if (!blk->info.hasVarCol) { + sz += numCols * ((rowIdxInPage & 0x7) == 0 ? 1: 0); + sz += blockDataGetRowSize(blk); + } else { + for (int32_t i = 0; i < numCols; ++i) { + SColumnInfoData* pColInfoData = TARRAY_GET_ELEM(blk->pDataBlock, i); + if (IS_VAR_DATA_TYPE(pColInfoData->info.type)) { + if (pColInfoData->varmeta.offset[row] != -1) { + char* p = colDataGetData(pColInfoData, row); + sz += varDataTLen(p); } - if (!source->onlyRef && source->src.pBlock) { - blockDataDestroy(source->src.pBlock); - source->src.pBlock = NULL; + + sz += sizeof(pColInfoData->varmeta.offset[0]); + } else { + sz += pColInfoData->info.bytes; + + if (((rowIdxInPage) & 0x07) == 0) { + sz += 1; // bitmap } - taosMemoryFree(source); - return code; } + } + } + return sz; +} + +static int32_t sortBlocksToExtSource(SSortHandle* pHandle, SArray* aBlk, SBlockOrderInfo* order, SArray* aExtSrc) { + int pgHeaderSz = sizeof(int32_t) + sizeof(int32_t) * taosArrayGetSize(pHandle->pDataBlock->pDataBlock); + int32_t rowCap = blockDataGetCapacityInRow(pHandle->pDataBlock, pHandle->pageSize, pgHeaderSz); + blockDataEnsureCapacity(pHandle->pDataBlock, rowCap); + blockDataCleanup(pHandle->pDataBlock); + int32_t numBlks = taosArrayGetSize(aBlk); + + SBlkMergeSupport sup; + sup.aRowIdx = taosMemoryCalloc(numBlks, sizeof(int32_t)); + sup.aTs = taosMemoryCalloc(numBlks, sizeof(int64_t*)); + sup.order = order->order; + for (int i = 0; i < numBlks; ++i) { + SSDataBlock* blk = taosArrayGetP(aBlk, i); + SColumnInfoData* col = taosArrayGet(blk->pDataBlock, order->slotId); + sup.aTs[i] = (int64_t*)col->pData; + sup.aRowIdx[i] = 0; + } + + int32_t totalRows = 0; + for (int i = 0; i < numBlks; ++i) { + SSDataBlock* blk = taosArrayGetP(aBlk, i); + totalRows += blk->info.rows; + } - size_t size = blockDataGetSize(pHandle->pDataBlock); - if (size > sortBufSize) { - // Perform the in-memory sort and then flush data in the buffer into disk. - int64_t p = taosGetTimestampUs(); - code = blockDataSort(pHandle->pDataBlock, pHandle->pSortInfo); - if (code != 0) { - if (source->param && !source->onlyRef) { - taosMemoryFree(source->param); + SArray* aPgId = taosArrayInit(8, sizeof(int32_t)); + + SMultiwayMergeTreeInfo* pTree = NULL; + tMergeTreeCreate(&pTree, taosArrayGetSize(aBlk), &sup, blockCompareTsFn); + int32_t nRows = 0; + int32_t nMergedRows = 0; + bool mergeLimitReached = false; + size_t blkPgSz = pgHeaderSz; + int64_t lastPageBufTs = (order->order == TSDB_ORDER_ASC) ? INT64_MAX : INT64_MIN; + int64_t currTs = (order->order == TSDB_ORDER_ASC) ? INT64_MAX : INT64_MIN; + while (nRows < totalRows) { + int32_t minIdx = tMergeTreeGetChosenIndex(pTree); + SSDataBlock* minBlk = taosArrayGetP(aBlk, minIdx); + int32_t minRow = sup.aRowIdx[minIdx]; + int32_t bufInc = getPageBufIncForRow(minBlk, minRow, pHandle->pDataBlock->info.rows); + + if (blkPgSz <= pHandle->pageSize && blkPgSz + bufInc > pHandle->pageSize) { + SColumnInfoData* tsCol = taosArrayGet(pHandle->pDataBlock->pDataBlock, order->slotId); + lastPageBufTs = ((int64_t*)tsCol->pData)[pHandle->pDataBlock->info.rows - 1]; + appendDataBlockToPageBuf(pHandle, pHandle->pDataBlock, aPgId); + nMergedRows += pHandle->pDataBlock->info.rows; + + blockDataCleanup(pHandle->pDataBlock); + blkPgSz = pgHeaderSz; + bufInc = getPageBufIncForRow(minBlk, minRow, 0); + + if ((pHandle->mergeLimit != -1) && (nMergedRows >= pHandle->mergeLimit)) { + mergeLimitReached = true; + if ((lastPageBufTs < pHandle->currMergeLimitTs && order->order == TSDB_ORDER_ASC) || + (lastPageBufTs > pHandle->currMergeLimitTs && order->order == TSDB_ORDER_DESC)) { + pHandle->currMergeLimitTs = lastPageBufTs; } - if (!source->onlyRef && source->src.pBlock) { - blockDataDestroy(source->src.pBlock); - source->src.pBlock = NULL; + break; + } + } + blockDataEnsureCapacity(pHandle->pDataBlock, pHandle->pDataBlock->info.rows + 1); + appendOneRowToDataBlock(pHandle->pDataBlock, minBlk, &minRow); + blkPgSz += bufInc; + + ++nRows; + + if (sup.aRowIdx[minIdx] == minBlk->info.rows - 1) { + sup.aRowIdx[minIdx] = -1; + } else { + ++sup.aRowIdx[minIdx]; + } + tMergeTreeAdjust(pTree, tMergeTreeGetAdjustIndex(pTree)); + } + if (pHandle->pDataBlock->info.rows > 0) { + if (!mergeLimitReached) { + SColumnInfoData* tsCol = taosArrayGet(pHandle->pDataBlock->pDataBlock, order->slotId); + lastPageBufTs = ((int64_t*)tsCol->pData)[pHandle->pDataBlock->info.rows - 1]; + appendDataBlockToPageBuf(pHandle, pHandle->pDataBlock, aPgId); + nMergedRows += pHandle->pDataBlock->info.rows; + if ((pHandle->mergeLimit != -1) && (nMergedRows >= pHandle->mergeLimit)) { + mergeLimitReached = true; + if ((lastPageBufTs < pHandle->currMergeLimitTs && order->order == TSDB_ORDER_ASC) || + (lastPageBufTs > pHandle->currMergeLimitTs && order->order == TSDB_ORDER_DESC)) { + pHandle->currMergeLimitTs = lastPageBufTs; } + } + } + blockDataCleanup(pHandle->pDataBlock); + } + SSDataBlock* pMemSrcBlk = createOneDataBlock(pHandle->pDataBlock, false); + doAddNewExternalMemSource(pHandle->pBuf, aExtSrc, pMemSrcBlk, &pHandle->sourceId, aPgId); - taosMemoryFree(source); - return code; - } + taosMemoryFree(sup.aRowIdx); + taosMemoryFree(sup.aTs); - int64_t el = taosGetTimestampUs() - p; - pHandle->sortElapsed += el; - if (pHandle->maxRows > 0) blockDataKeepFirstNRows(pHandle->pDataBlock, pHandle->maxRows); - code = doAddToBuf(pHandle->pDataBlock, pHandle); - if (code != TSDB_CODE_SUCCESS) { - return code; - } + tMergeTreeDestroy(&pTree); + + return 0; +} + +static int32_t createBlocksMergeSortInitialSources(SSortHandle* pHandle) { + SBlockOrderInfo* pOrder = taosArrayGet(pHandle->pSortInfo, 0); + size_t nSrc = taosArrayGetSize(pHandle->pOrderedSource); + SArray* aExtSrc = taosArrayInit(nSrc, POINTER_BYTES); + + size_t maxBufSize = pHandle->numOfPages * pHandle->pageSize; + createPageBuf(pHandle); + + SSortSource* pSrc = taosArrayGetP(pHandle->pOrderedSource, 0); + int32_t szSort = 0; + + if (pOrder->order == TSDB_ORDER_ASC) { + pHandle->currMergeLimitTs = INT64_MAX; + } else { + pHandle->currMergeLimitTs = INT64_MIN; + } + + SArray* aBlkSort = taosArrayInit(8, POINTER_BYTES); + SSHashObj* mUidBlk = tSimpleHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT)); + while (1) { + SSDataBlock* pBlk = pHandle->fetchfp(pSrc->param); + if (pBlk != NULL) { + SColumnInfoData* tsCol = taosArrayGet(pBlk->pDataBlock, pOrder->slotId); + int64_t firstRowTs = *(int64_t*)tsCol->pData; + if ((pOrder->order == TSDB_ORDER_ASC && firstRowTs > pHandle->currMergeLimitTs) || + (pOrder->order == TSDB_ORDER_DESC && firstRowTs < pHandle->currMergeLimitTs)) { + continue; + } + } + if (pBlk != NULL) { + szSort += blockDataGetSize(pBlk); + + void* ppBlk = tSimpleHashGet(mUidBlk, &pBlk->info.id.uid, sizeof(pBlk->info.id.uid)); + if (ppBlk != NULL) { + SSDataBlock* tBlk = *(SSDataBlock**)(ppBlk); + blockDataMerge(tBlk, pBlk); + } else { + SSDataBlock* tBlk = createOneDataBlock(pBlk, true); + tSimpleHashPut(mUidBlk, &pBlk->info.id.uid, sizeof(pBlk->info.id.uid), &tBlk, POINTER_BYTES); + taosArrayPush(aBlkSort, &tBlk); + } + } + + if ((pBlk != NULL && szSort > maxBufSize) || (pBlk == NULL && szSort > 0)) { + tSimpleHashClear(mUidBlk); + + int64_t p = taosGetTimestampUs(); + sortBlocksToExtSource(pHandle, aBlkSort, pOrder, aExtSrc); + int64_t el = taosGetTimestampUs() - p; + pHandle->sortElapsed += el; + + for (int i = 0; i < taosArrayGetSize(aBlkSort); ++i) { + blockDataDestroy(taosArrayGetP(aBlkSort, i)); } + taosArrayClear(aBlkSort); + szSort = 0; + qDebug("source %zu created", taosArrayGetSize(aExtSrc)); } + if (pBlk == NULL) { + break; + } + + if (tsortIsClosed(pHandle)) { + tSimpleHashClear(mUidBlk); + for (int i = 0; i < taosArrayGetSize(aBlkSort); ++i) { + blockDataDestroy(taosArrayGetP(aBlkSort, i)); + } + taosArrayClear(aBlkSort); + break; + } + } + + tSimpleHashCleanup(mUidBlk); + taosArrayDestroy(aBlkSort); + tsortClearOrderdSource(pHandle->pOrderedSource, NULL, NULL); + if (!tsortIsClosed(pHandle)) { + taosArrayAddAll(pHandle->pOrderedSource, aExtSrc); + } + taosArrayDestroy(aExtSrc); + + pHandle->type = SORT_SINGLESOURCE_SORT; + return 0; +} + +static int32_t createBlocksQuickSortInitialSources(SSortHandle* pHandle) { + int32_t code = 0; + size_t sortBufSize = pHandle->numOfPages * pHandle->pageSize; + + SSortSource** pSource = taosArrayGet(pHandle->pOrderedSource, 0); + SSortSource* source = *pSource; + *pSource = NULL; + + tsortClearOrderdSource(pHandle->pOrderedSource, NULL, NULL); + + while (1) { + SSDataBlock* pBlock = pHandle->fetchfp(source->param); + if (pBlock == NULL) { + break; + } + + if (pHandle->pDataBlock == NULL) { + uint32_t numOfCols = taosArrayGetSize(pBlock->pDataBlock); + pHandle->pageSize = getProperSortPageSize(blockDataGetRowSize(pBlock), numOfCols); - if (source->param && !source->onlyRef) { - taosMemoryFree(source->param); + // todo, number of pages are set according to the total available sort buffer + pHandle->numOfPages = 1024; + sortBufSize = pHandle->numOfPages * pHandle->pageSize; + pHandle->pDataBlock = createOneDataBlock(pBlock, false); } - taosMemoryFree(source); + if (pHandle->beforeFp != NULL) { + pHandle->beforeFp(pBlock, pHandle->param); + } - if (pHandle->pDataBlock != NULL && pHandle->pDataBlock->info.rows > 0) { - size_t size = blockDataGetSize(pHandle->pDataBlock); + code = blockDataMerge(pHandle->pDataBlock, pBlock); + if (code != TSDB_CODE_SUCCESS) { + if (source->param && !source->onlyRef) { + taosMemoryFree(source->param); + } + if (!source->onlyRef && source->src.pBlock) { + blockDataDestroy(source->src.pBlock); + source->src.pBlock = NULL; + } + taosMemoryFree(source); + return code; + } + size_t size = blockDataGetSize(pHandle->pDataBlock); + if (size > sortBufSize) { // Perform the in-memory sort and then flush data in the buffer into disk. int64_t p = taosGetTimestampUs(); - code = blockDataSort(pHandle->pDataBlock, pHandle->pSortInfo); if (code != 0) { + if (source->param && !source->onlyRef) { + taosMemoryFree(source->param); + } + if (!source->onlyRef && source->src.pBlock) { + blockDataDestroy(source->src.pBlock); + source->src.pBlock = NULL; + } + + taosMemoryFree(source); return code; } - if (pHandle->maxRows > 0) blockDataKeepFirstNRows(pHandle->pDataBlock, pHandle->maxRows); int64_t el = taosGetTimestampUs() - p; pHandle->sortElapsed += el; + if (pHandle->pqMaxRows > 0) blockDataKeepFirstNRows(pHandle->pDataBlock, pHandle->pqMaxRows); + code = doAddToBuf(pHandle->pDataBlock, pHandle); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + } - // All sorted data can fit in memory, external memory sort is not needed. Return to directly - if (size <= sortBufSize && pHandle->pBuf == NULL) { - pHandle->cmpParam.numOfSources = 1; - pHandle->inMemSort = true; + if (source->param && !source->onlyRef) { + taosMemoryFree(source->param); + } - pHandle->loops = 1; - pHandle->tupleHandle.rowIndex = -1; - pHandle->tupleHandle.pBlock = pHandle->pDataBlock; - return 0; - } else { - code = doAddToBuf(pHandle->pDataBlock, pHandle); - } + taosMemoryFree(source); + + if (pHandle->pDataBlock != NULL && pHandle->pDataBlock->info.rows > 0) { + size_t size = blockDataGetSize(pHandle->pDataBlock); + + // Perform the in-memory sort and then flush data in the buffer into disk. + int64_t p = taosGetTimestampUs(); + + code = blockDataSort(pHandle->pDataBlock, pHandle->pSortInfo); + if (code != 0) { + return code; + } + + if (pHandle->pqMaxRows > 0) blockDataKeepFirstNRows(pHandle->pDataBlock, pHandle->pqMaxRows); + int64_t el = taosGetTimestampUs() - p; + pHandle->sortElapsed += el; + + // All sorted data can fit in memory, external memory sort is not needed. Return to directly + if (size <= sortBufSize && pHandle->pBuf == NULL) { + pHandle->cmpParam.numOfSources = 1; + pHandle->inMemSort = true; + + pHandle->loops = 1; + pHandle->tupleHandle.rowIndex = -1; + pHandle->tupleHandle.pBlock = pHandle->pDataBlock; + return 0; + } else { + code = doAddToBuf(pHandle->pDataBlock, pHandle); } } + return code; +} + +static int32_t createInitialSources(SSortHandle* pHandle) { + int32_t code = 0; + if (pHandle->type == SORT_SINGLESOURCE_SORT) { + code = createBlocksQuickSortInitialSources(pHandle); + } else if (pHandle->type == SORT_BLOCK_TS_MERGE) { + code = createBlocksMergeSortInitialSources(pHandle); + } + qDebug("%zu sources created", taosArrayGetSize(pHandle->pOrderedSource)); return code; } @@ -923,6 +1236,10 @@ void tsortSetClosed(SSortHandle* pHandle) { atomic_store_8(&pHandle->closed, 2); } +void tsortSetMergeLimit(SSortHandle* pHandle, int64_t mergeLimit) { + pHandle->mergeLimit = mergeLimit; +} + int32_t tsortSetFetchRawDataFp(SSortHandle* pHandle, _sort_fetch_block_fn_t fetchFp, void (*fp)(SSDataBlock*, void*), void* param) { pHandle->fetchfp = fetchFp; @@ -1002,8 +1319,8 @@ void tsortSetForceUsePQSort(SSortHandle* pHandle) { static bool tsortIsPQSortApplicable(SSortHandle* pHandle) { if (pHandle->type != SORT_SINGLESOURCE_SORT) return false; if (tsortIsForceUsePQSort(pHandle)) return true; - uint64_t maxRowsFitInMemory = pHandle->sortBufSize / (pHandle->maxTupleLength + sizeof(char*)); - return maxRowsFitInMemory > pHandle->maxRows; + uint64_t maxRowsFitInMemory = pHandle->pqSortBufSize / (pHandle->pqMaxTupleLength + sizeof(char*)); + return maxRowsFitInMemory > pHandle->pqMaxRows; } static bool tsortPQCompFn(void* a, void* b, void* param) { @@ -1049,7 +1366,7 @@ static int32_t tupleComparFn(const void* pLeft, const void* pRight, void* param) } static int32_t tsortOpenForPQSort(SSortHandle* pHandle) { - pHandle->pBoundedQueue = createBoundedQueue(pHandle->maxRows, tsortPQCompFn, destroyTuple, pHandle); + pHandle->pBoundedQueue = createBoundedQueue(pHandle->pqMaxRows, tsortPQCompFn, destroyTuple, pHandle); if (NULL == pHandle->pBoundedQueue) return TSDB_CODE_OUT_OF_MEMORY; tsortSetComparFp(pHandle, tupleComparFn); @@ -1100,6 +1417,9 @@ static int32_t tsortOpenForPQSort(SSortHandle* pHandle) { } static STupleHandle* tsortPQSortNextTuple(SSortHandle* pHandle) { + if (pHandle->pDataBlock == NULL) { // when no input stream datablock + return NULL; + } blockDataCleanup(pHandle->pDataBlock); blockDataEnsureCapacity(pHandle->pDataBlock, 1); // abandon the top tuple if queue size bigger than max size diff --git a/source/libs/function/src/builtinsimpl.c b/source/libs/function/src/builtinsimpl.c index 3e16a40575548378f1710af907665cd5c50b90d4..fad8c9ca5bb4869a9d3d869ba6a36d127c0613fa 100644 --- a/source/libs/function/src/builtinsimpl.c +++ b/source/libs/function/src/builtinsimpl.c @@ -920,6 +920,7 @@ void appendSelectivityValue(SqlFunctionCtx* pCtx, int32_t rowIndex, int32_t pos) void replaceTupleData(STuplePos* pDestPos, STuplePos* pSourcePos) { *pDestPos = *pSourcePos; } +#define COMPARE_MINMAX_DATA(type) (( (*(type*)&pDBuf->v) < (*(type*)&pSBuf->v) ) ^ isMinFunc) int32_t minMaxCombine(SqlFunctionCtx* pDestCtx, SqlFunctionCtx* pSourceCtx, int32_t isMinFunc) { SResultRowEntryInfo* pDResInfo = GET_RES_INFO(pDestCtx); SMinmaxResInfo* pDBuf = GET_ROWCELL_INTERBUF(pDResInfo); @@ -927,18 +928,57 @@ int32_t minMaxCombine(SqlFunctionCtx* pDestCtx, SqlFunctionCtx* pSourceCtx, int3 SResultRowEntryInfo* pSResInfo = GET_RES_INFO(pSourceCtx); SMinmaxResInfo* pSBuf = GET_ROWCELL_INTERBUF(pSResInfo); int16_t type = pDBuf->type == TSDB_DATA_TYPE_NULL ? pSBuf->type : pDBuf->type; - if (IS_FLOAT_TYPE(type)) { - if (pSBuf->assign && ((((*(double*)&pDBuf->v) < (*(double*)&pSBuf->v)) ^ isMinFunc) || !pDBuf->assign)) { - *(double*)&pDBuf->v = *(double*)&pSBuf->v; - replaceTupleData(&pDBuf->tuplePos, &pSBuf->tuplePos); - pDBuf->assign = true; - } - } else { - if (pSBuf->assign && (((pDBuf->v < pSBuf->v) ^ isMinFunc) || !pDBuf->assign)) { - pDBuf->v = pSBuf->v; - replaceTupleData(&pDBuf->tuplePos, &pSBuf->tuplePos); - pDBuf->assign = true; + + switch (type) { + case TSDB_DATA_TYPE_DOUBLE: + case TSDB_DATA_TYPE_UBIGINT: + case TSDB_DATA_TYPE_BIGINT: + if (pSBuf->assign && (COMPARE_MINMAX_DATA(int64_t) || !pDBuf->assign)) { + pDBuf->v = pSBuf->v; + replaceTupleData(&pDBuf->tuplePos, &pSBuf->tuplePos); + pDBuf->assign = true; + } + break; + case TSDB_DATA_TYPE_UINT: + case TSDB_DATA_TYPE_INT: + if (pSBuf->assign && (COMPARE_MINMAX_DATA(int32_t) || !pDBuf->assign)) { + pDBuf->v = pSBuf->v; + replaceTupleData(&pDBuf->tuplePos, &pSBuf->tuplePos); + pDBuf->assign = true; + } + break; + case TSDB_DATA_TYPE_USMALLINT: + case TSDB_DATA_TYPE_SMALLINT: + if (pSBuf->assign && (COMPARE_MINMAX_DATA(int16_t) || !pDBuf->assign)) { + pDBuf->v = pSBuf->v; + replaceTupleData(&pDBuf->tuplePos, &pSBuf->tuplePos); + pDBuf->assign = true; + } + break; + case TSDB_DATA_TYPE_BOOL: + case TSDB_DATA_TYPE_UTINYINT: + case TSDB_DATA_TYPE_TINYINT: + if (pSBuf->assign && (COMPARE_MINMAX_DATA(int8_t) || !pDBuf->assign)) { + pDBuf->v = pSBuf->v; + replaceTupleData(&pDBuf->tuplePos, &pSBuf->tuplePos); + pDBuf->assign = true; + } + break; + case TSDB_DATA_TYPE_FLOAT: { + if (pSBuf->assign && (COMPARE_MINMAX_DATA(double) || !pDBuf->assign)) { + pDBuf->v = pSBuf->v; + replaceTupleData(&pDBuf->tuplePos, &pSBuf->tuplePos); + pDBuf->assign = true; + } + break; } + default: + if (pSBuf->assign && (strcmp((char*)&pDBuf->v, (char*)&pSBuf->v) || !pDBuf->assign)) { + pDBuf->v = pSBuf->v; + replaceTupleData(&pDBuf->tuplePos, &pSBuf->tuplePos); + pDBuf->assign = true; + } + break; } pDResInfo->numOfRes = TMAX(pDResInfo->numOfRes, pSResInfo->numOfRes); pDResInfo->isNullRes &= pSResInfo->isNullRes; diff --git a/source/libs/monitor/src/monMain.c b/source/libs/monitor/src/monMain.c index 949e91198adcb69301fbc177b721289d6d736651..56cf0a2b51002f8878eda24d0aaf538e1a6a627f 100644 --- a/source/libs/monitor/src/monMain.c +++ b/source/libs/monitor/src/monMain.c @@ -468,9 +468,6 @@ static void monGenLogJson(SMonInfo *pMonitor) { return; } - SJson *pLogsJson = tjsonAddArrayToObject(pJson, "logs"); - if (pLogsJson == NULL) return; - SMonLogs *logs[6]; logs[0] = &pMonitor->log; logs[1] = &pMonitor->mmInfo.log; @@ -490,22 +487,6 @@ static void monGenLogJson(SMonInfo *pMonitor) { numOfInfoLogs += pLog->numOfInfoLogs; numOfDebugLogs += pLog->numOfDebugLogs; numOfTraceLogs += pLog->numOfTraceLogs; - - for (int32_t i = 0; i < taosArrayGetSize(pLog->logs); ++i) { - SJson *pLogJson = tjsonCreateObject(); - if (pLogJson == NULL) continue; - - SMonLogItem *pLogItem = taosArrayGet(pLog->logs, i); - - char buf[40] = {0}; - taosFormatUtcTime(buf, sizeof(buf), pLogItem->ts, TSDB_TIME_PRECISION_MILLI); - - tjsonAddStringToObject(pLogJson, "ts", buf); - tjsonAddStringToObject(pLogJson, "level", monLogLevelStr(pLogItem->level)); - tjsonAddStringToObject(pLogJson, "content", pLogItem->content); - - if (tjsonAddItemToArray(pLogsJson, pLogJson) != 0) tjsonDelete(pLogJson); - } } SJson *pSummaryJson = tjsonAddArrayToObject(pJson, "summary"); diff --git a/source/libs/nodes/src/nodesCloneFuncs.c b/source/libs/nodes/src/nodesCloneFuncs.c index 6e4dde4ec1752f4bb3349c9c41658ab0140264ae..f5eacf0bd5d1c15bb7c773ee60caea7abc0dc0b5 100644 --- a/source/libs/nodes/src/nodesCloneFuncs.c +++ b/source/libs/nodes/src/nodesCloneFuncs.c @@ -361,6 +361,7 @@ static int32_t logicNodeCopy(const SLogicNode* pSrc, SLogicNode* pDst) { COPY_SCALAR_FIELD(groupAction); COPY_SCALAR_FIELD(inputTsOrder); COPY_SCALAR_FIELD(outputTsOrder); + COPY_SCALAR_FIELD(forceCreateNonBlockingOptr); return TSDB_CODE_SUCCESS; } @@ -397,6 +398,7 @@ static int32_t logicScanCopy(const SScanLogicNode* pSrc, SScanLogicNode* pDst) { CLONE_NODE_LIST_FIELD(pTags); CLONE_NODE_FIELD(pSubtable); COPY_SCALAR_FIELD(igLastNull); + COPY_SCALAR_FIELD(groupOrderScan); return TSDB_CODE_SUCCESS; } @@ -545,6 +547,7 @@ static int32_t physiNodeCopy(const SPhysiNode* pSrc, SPhysiNode* pDst) { CLONE_NODE_LIST_FIELD(pChildren); COPY_SCALAR_FIELD(inputTsOrder); COPY_SCALAR_FIELD(outputTsOrder); + COPY_SCALAR_FIELD(forceCreateNonBlockingOptr); return TSDB_CODE_SUCCESS; } @@ -556,6 +559,7 @@ static int32_t physiScanCopy(const SScanPhysiNode* pSrc, SScanPhysiNode* pDst) { COPY_SCALAR_FIELD(suid); COPY_SCALAR_FIELD(tableType); COPY_OBJECT_FIELD(tableName, sizeof(SName)); + COPY_SCALAR_FIELD(groupOrderScan); return TSDB_CODE_SUCCESS; } diff --git a/source/libs/nodes/src/nodesCodeFuncs.c b/source/libs/nodes/src/nodesCodeFuncs.c index 81116a60b06272a8b9be9a7c2438eec4b317f784..f25616065eb064d0160209320ecb744ba6ac23d8 100644 --- a/source/libs/nodes/src/nodesCodeFuncs.c +++ b/source/libs/nodes/src/nodesCodeFuncs.c @@ -1559,6 +1559,7 @@ static const char* jkScanPhysiPlanTableId = "TableId"; static const char* jkScanPhysiPlanSTableId = "STableId"; static const char* jkScanPhysiPlanTableType = "TableType"; static const char* jkScanPhysiPlanTableName = "TableName"; +static const char* jkScanPhysiPlanGroupOrderScan = "GroupOrderScan"; static int32_t physiScanNodeToJson(const void* pObj, SJson* pJson) { const STagScanPhysiNode* pNode = (const STagScanPhysiNode*)pObj; @@ -1582,6 +1583,9 @@ static int32_t physiScanNodeToJson(const void* pObj, SJson* pJson) { if (TSDB_CODE_SUCCESS == code) { code = tjsonAddObject(pJson, jkScanPhysiPlanTableName, nameToJson, &pNode->tableName); } + if (TSDB_CODE_SUCCESS == code) { + code = tjsonAddBoolToObject(pJson, jkScanPhysiPlanGroupOrderScan, pNode->groupOrderScan); + } return code; } @@ -1608,6 +1612,9 @@ static int32_t jsonToPhysiScanNode(const SJson* pJson, void* pObj) { if (TSDB_CODE_SUCCESS == code) { code = tjsonToObject(pJson, jkScanPhysiPlanTableName, jsonToName, &pNode->tableName); } + if (TSDB_CODE_SUCCESS == code) { + code = tjsonGetBoolValue(pJson, jkScanPhysiPlanGroupOrderScan, &pNode->groupOrderScan); + } return code; } diff --git a/source/libs/nodes/src/nodesMsgFuncs.c b/source/libs/nodes/src/nodesMsgFuncs.c index 1ca37defa4a76a6b679e85facae10a6cd758fb80..20e829766dafe75aa2372aa1e4147ac9b856ee71 100644 --- a/source/libs/nodes/src/nodesMsgFuncs.c +++ b/source/libs/nodes/src/nodesMsgFuncs.c @@ -1853,7 +1853,8 @@ enum { PHY_NODE_CODE_LIMIT, PHY_NODE_CODE_SLIMIT, PHY_NODE_CODE_INPUT_TS_ORDER, - PHY_NODE_CODE_OUTPUT_TS_ORDER + PHY_NODE_CODE_OUTPUT_TS_ORDER, + PHY_NODE_CODE_FORCE_NONBLOCKING_OPTR }; static int32_t physiNodeToMsg(const void* pObj, STlvEncoder* pEncoder) { @@ -1878,6 +1879,9 @@ static int32_t physiNodeToMsg(const void* pObj, STlvEncoder* pEncoder) { if (TSDB_CODE_SUCCESS == code) { code = tlvEncodeEnum(pEncoder, PHY_NODE_CODE_OUTPUT_TS_ORDER, pNode->outputTsOrder); } + if (TSDB_CODE_SUCCESS == code) { + code = tlvEncodeBool(pEncoder, PHY_NODE_CODE_FORCE_NONBLOCKING_OPTR, pNode->forceCreateNonBlockingOptr); + } return code; } @@ -1910,6 +1914,8 @@ static int32_t msgToPhysiNode(STlvDecoder* pDecoder, void* pObj) { case PHY_NODE_CODE_OUTPUT_TS_ORDER: code = tlvDecodeEnum(pTlv, &pNode->outputTsOrder, sizeof(pNode->outputTsOrder)); break; + case PHY_NODE_CODE_FORCE_NONBLOCKING_OPTR: + code = tlvDecodeBool(pTlv, &pNode->forceCreateNonBlockingOptr); default: break; } @@ -1925,7 +1931,8 @@ enum { PHY_SCAN_CODE_BASE_UID, PHY_SCAN_CODE_BASE_SUID, PHY_SCAN_CODE_BASE_TABLE_TYPE, - PHY_SCAN_CODE_BASE_TABLE_NAME + PHY_SCAN_CODE_BASE_TABLE_NAME, + PHY_SCAN_CODE_BASE_GROUP_ORDER_SCAN }; static int32_t physiScanNodeToMsg(const void* pObj, STlvEncoder* pEncoder) { @@ -1950,6 +1957,9 @@ static int32_t physiScanNodeToMsg(const void* pObj, STlvEncoder* pEncoder) { if (TSDB_CODE_SUCCESS == code) { code = tlvEncodeObj(pEncoder, PHY_SCAN_CODE_BASE_TABLE_NAME, nameToMsg, &pNode->tableName); } + if (TSDB_CODE_SUCCESS == code) { + code = tlvEncodeBool(pEncoder, PHY_SCAN_CODE_BASE_GROUP_ORDER_SCAN, pNode->groupOrderScan); + } return code; } @@ -1982,6 +1992,9 @@ static int32_t msgToPhysiScanNode(STlvDecoder* pDecoder, void* pObj) { case PHY_SCAN_CODE_BASE_TABLE_NAME: code = tlvDecodeObjFromTlv(pTlv, msgToName, &pNode->tableName); break; + case PHY_SCAN_CODE_BASE_GROUP_ORDER_SCAN: + code = tlvDecodeBool(pTlv, &pNode->groupOrderScan); + break; default: break; } diff --git a/source/libs/parser/inc/parUtil.h b/source/libs/parser/inc/parUtil.h index 1a4ee3e91a1aa861d06d0e2829ac806963c5b20c..d96bb9bba430a4443a17f92be81fd777c62388bd 100644 --- a/source/libs/parser/inc/parUtil.h +++ b/source/libs/parser/inc/parUtil.h @@ -118,6 +118,12 @@ int32_t getDnodeListFromCache(SParseMetaCache* pMetaCache, SArray** pDnodes); void destoryParseMetaCache(SParseMetaCache* pMetaCache, bool request); SNode* createSelectStmtImpl(bool isDistinct, SNodeList* pProjectionList, SNode* pTable); +/** + * @brief return a - b with overflow check + * @retval val range between [INT64_MIN, INT64_MAX] + */ +int64_t int64SafeSub(int64_t a, int64_t b); + #ifdef __cplusplus } #endif diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index 1427ada6da81711ea1753984709bad74b4a10f36..8ce68a5c8c2d1604622c08f8fd49302dcf796639 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -2934,14 +2934,14 @@ static int32_t createMultiResFuncsFromStar(STranslateContext* pCxt, SFunctionNod static int32_t createTags(STranslateContext* pCxt, SNodeList** pOutput) { if (QUERY_NODE_REAL_TABLE != nodeType(((SSelectStmt*)pCxt->pCurrStmt)->pFromTable)) { return generateSyntaxErrMsgExt(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_TAGS_PC, - "The _TAGS pseudo column can only be used for subtable and supertable queries"); + "The _TAGS pseudo column can only be used for child table and super table queries"); } SRealTableNode* pTable = (SRealTableNode*)(((SSelectStmt*)pCxt->pCurrStmt)->pFromTable); const STableMeta* pMeta = pTable->pMeta; if (TSDB_SUPER_TABLE != pMeta->tableType && TSDB_CHILD_TABLE != pMeta->tableType) { return generateSyntaxErrMsg(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_TAGS_PC, - "The _TAGS pseudo column can only be used for subtable and supertable queries"); + "The _TAGS pseudo column can only be used for child table and super table queries"); } SSchema* pTagsSchema = getTableTagSchema(pMeta); @@ -3296,23 +3296,25 @@ static int32_t checkFill(STranslateContext* pCxt, SFillNode* pFill, SValueNode* if (NULL == pInterval) { return TSDB_CODE_SUCCESS; } - - int64_t timeRange = TABS(pFill->timeRange.skey - pFill->timeRange.ekey); + int64_t timeRange = 0; int64_t intervalRange = 0; - if (IS_CALENDAR_TIME_DURATION(pInterval->unit)) { - int64_t f = 1; - if (pInterval->unit == 'n') { - f = 30LL * MILLISECOND_PER_DAY; - } else if (pInterval->unit == 'y') { - f = 365LL * MILLISECOND_PER_DAY; - } - intervalRange = pInterval->datum.i * f; - } else { - intervalRange = pInterval->datum.i; - } - - if ((timeRange / intervalRange) >= MAX_INTERVAL_TIME_WINDOW) { - return generateSyntaxErrMsg(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_FILL_TIME_RANGE); + if (!pCxt->createStream) { + int64_t res = int64SafeSub(pFill->timeRange.skey, pFill->timeRange.ekey); + timeRange = res < 0 ? res == INT64_MIN ? INT64_MAX : -res : res; + if (IS_CALENDAR_TIME_DURATION(pInterval->unit)) { + int64_t f = 1; + if (pInterval->unit == 'n') { + f = 30LL * MILLISECOND_PER_DAY; + } else if (pInterval->unit == 'y') { + f = 365LL * MILLISECOND_PER_DAY; + } + intervalRange = pInterval->datum.i * f; + } else { + intervalRange = pInterval->datum.i; + } + if ((timeRange / intervalRange) >= MAX_INTERVAL_TIME_WINDOW) { + return generateSyntaxErrMsg(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_FILL_TIME_RANGE); + } } return TSDB_CODE_SUCCESS; @@ -3413,7 +3415,8 @@ static int32_t checkIntervalWindow(STranslateContext* pCxt, SIntervalWindowNode* if (IS_CALENDAR_TIME_DURATION(pSliding->unit)) { return generateSyntaxErrMsg(&pCxt->msgBuf, TSDB_CODE_PAR_INTER_SLIDING_UNIT); } - if ((pSliding->datum.i < convertTimePrecision(tsMinSlidingTime, TSDB_TIME_PRECISION_MILLI, precision)) || + if ((pSliding->datum.i < + convertTimeFromPrecisionToUnit(tsMinSlidingTime, TSDB_TIME_PRECISION_MILLI, pSliding->unit)) || (pInter->datum.i / pSliding->datum.i > INTERVAL_SLIDING_FACTOR)) { return generateSyntaxErrMsg(&pCxt->msgBuf, TSDB_CODE_PAR_INTER_SLIDING_TOO_SMALL); } @@ -6196,9 +6199,7 @@ static int32_t translateCreateTopic(STranslateContext* pCxt, SCreateTopicStmt* p static int32_t translateDropTopic(STranslateContext* pCxt, SDropTopicStmt* pStmt) { SMDropTopicReq dropReq = {0}; - SName name; - tNameSetDbName(&name, pCxt->pParseCxt->acctId, pStmt->topicName, strlen(pStmt->topicName)); - tNameGetFullDbName(&name, dropReq.name); + snprintf(dropReq.name, sizeof(dropReq.name), "%d.%s", pCxt->pParseCxt->acctId, pStmt->topicName); dropReq.igNotExists = pStmt->ignoreNotExists; return buildCmdMsg(pCxt, TDMT_MND_TMQ_DROP_TOPIC, (FSerializeFunc)tSerializeSMDropTopicReq, &dropReq); @@ -7719,7 +7720,7 @@ static int32_t extractShowCreateTableResultSchema(int32_t* numOfCols, SSchema** } static int32_t extractShowVariablesResultSchema(int32_t* numOfCols, SSchema** pSchema) { - *numOfCols = 2; + *numOfCols = 3; *pSchema = taosMemoryCalloc((*numOfCols), sizeof(SSchema)); if (NULL == (*pSchema)) { return TSDB_CODE_OUT_OF_MEMORY; @@ -7733,6 +7734,10 @@ static int32_t extractShowVariablesResultSchema(int32_t* numOfCols, SSchema** pS (*pSchema)[1].bytes = TSDB_CONFIG_VALUE_LEN; strcpy((*pSchema)[1].name, "value"); + (*pSchema)[2].type = TSDB_DATA_TYPE_BINARY; + (*pSchema)[2].bytes = TSDB_CONFIG_SCOPE_LEN; + strcpy((*pSchema)[2].name, "scope"); + return TSDB_CODE_SUCCESS; } diff --git a/source/libs/parser/src/parUtil.c b/source/libs/parser/src/parUtil.c index 263318b92f081ec7bfb817d09a6fe81218567ffc..1c292b1ec4a0377fe72efde544692e4b228243ae 100644 --- a/source/libs/parser/src/parUtil.c +++ b/source/libs/parser/src/parUtil.c @@ -164,6 +164,8 @@ static char* getSyntaxErrFormat(int32_t errCode) { return "%s function is not supported in fill query"; case TSDB_CODE_PAR_INVALID_WINDOW_PC: return "_WSTART, _WEND and _WDURATION can only be used in window query"; + case TSDB_CODE_PAR_INVALID_TAGS_PC: + return "Tags can only applied to super table and child table"; case TSDB_CODE_PAR_WINDOW_NOT_ALLOWED_FUNC: return "%s function is not supported in time window query"; case TSDB_CODE_PAR_STREAM_NOT_ALLOWED_FUNC: @@ -1140,3 +1142,18 @@ void destoryParseMetaCache(SParseMetaCache* pMetaCache, bool request) { taosHashCleanup(pMetaCache->pTableIndex); taosHashCleanup(pMetaCache->pTableCfg); } + +int64_t int64SafeSub(int64_t a, int64_t b) { + int64_t res = (uint64_t)a - (uint64_t)b; + + if (a >= 0 && b < 0) { + if ((uint64_t)res > (uint64_t)INT64_MAX) { + // overflow + res = INT64_MAX; + } + } else if (a < 0 && b > 0 && res >= 0) { + // underflow + res = INT64_MIN; + } + return res; +} diff --git a/source/libs/planner/inc/planInt.h b/source/libs/planner/inc/planInt.h index 82abc5d1a973dbafa820c41e0614e1b2c90a92c3..092fe1741187dcf3706a64a7be64a3032835b44a 100644 --- a/source/libs/planner/inc/planInt.h +++ b/source/libs/planner/inc/planInt.h @@ -43,6 +43,9 @@ int32_t splitLogicPlan(SPlanContext* pCxt, SLogicSubplan* pLogicSubplan); int32_t scaleOutLogicPlan(SPlanContext* pCxt, SLogicSubplan* pLogicSubplan, SQueryLogicPlan** pLogicPlan); int32_t createPhysiPlan(SPlanContext* pCxt, SQueryLogicPlan* pLogicPlan, SQueryPlan** pPlan, SArray* pExecNodeList); +bool isPartTableAgg(SAggLogicNode* pAgg); +bool isPartTableWinodw(SWindowLogicNode* pWindow); + #ifdef __cplusplus } #endif diff --git a/source/libs/planner/src/planLogicCreater.c b/source/libs/planner/src/planLogicCreater.c index 713f12e2294c49bb1327728a7fa162d6313e31f2..37c1a288631177b3f5c3690cde8fc18c3a98128e 100644 --- a/source/libs/planner/src/planLogicCreater.c +++ b/source/libs/planner/src/planLogicCreater.c @@ -1033,7 +1033,6 @@ static int32_t createSortLogicNode(SLogicPlanContext* pCxt, SSelectStmt* pSelect pSort->node.resultDataOrder = isPrimaryKeySort(pSelect->pOrderByList) ? (pSort->groupSort ? DATA_ORDER_LEVEL_IN_GROUP : DATA_ORDER_LEVEL_GLOBAL) : DATA_ORDER_LEVEL_NONE; - int32_t code = nodesCollectColumns(pSelect, SQL_CLAUSE_ORDER_BY, NULL, COLLECT_COL_TYPE_ALL, &pSort->node.pTargets); if (TSDB_CODE_SUCCESS == code && NULL == pSort->node.pTargets) { code = nodesListMakeStrictAppend(&pSort->node.pTargets, @@ -1047,6 +1046,7 @@ static int32_t createSortLogicNode(SLogicPlanContext* pCxt, SSelectStmt* pSelect } SNode* pNode = NULL; SOrderByExprNode* firstSortKey = (SOrderByExprNode*)nodesListGetNode(pSort->pSortKeys, 0); + if (isPrimaryKeySort(pSelect->pOrderByList)) pSort->node.outputTsOrder = firstSortKey->order; if (firstSortKey->pExpr->type == QUERY_NODE_COLUMN) { SColumnNode* pCol = (SColumnNode*)firstSortKey->pExpr; int16_t projIdx = 1; diff --git a/source/libs/planner/src/planOptimizer.c b/source/libs/planner/src/planOptimizer.c index 05f478b116518ac04d824c9e812440e5976ded87..5765e304a9940f288329e0bc45f85f76275e8247 100644 --- a/source/libs/planner/src/planOptimizer.c +++ b/source/libs/planner/src/planOptimizer.c @@ -363,6 +363,18 @@ static void scanPathOptSetScanOrder(EScanOrder scanOrder, SScanLogicNode* pScan) } } +static void scanPathOptSetGroupOrderScan(SScanLogicNode* pScan) { + if (pScan->tableType != TSDB_SUPER_TABLE) return; + + if (pScan->node.pParent && nodeType(pScan->node.pParent) == QUERY_NODE_LOGIC_PLAN_AGG) { + SAggLogicNode* pAgg = (SAggLogicNode*)pScan->node.pParent; + bool withSlimit = pAgg->node.pSlimit != NULL || (pAgg->node.pParent && pAgg->node.pParent->pSlimit); + if (withSlimit && isPartTableAgg(pAgg)) { + pScan->groupOrderScan = pAgg->node.forceCreateNonBlockingOptr = true; + } + } +} + static int32_t scanPathOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogicSubplan) { SOsdInfo info = {.scanOrder = SCAN_ORDER_ASC}; int32_t code = scanPathOptMatch(pCxt, pLogicSubplan->pNode, &info); @@ -371,6 +383,7 @@ static int32_t scanPathOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogicSub if (!pCxt->pPlanCxt->streamQuery) { scanPathOptSetScanOrder(info.scanOrder, info.pScan); } + scanPathOptSetGroupOrderScan(info.pScan); } if (TSDB_CODE_SUCCESS == code && (NULL != info.pDsoFuncs || NULL != info.pSdrFuncs)) { info.pScan->dataRequired = scanPathOptGetDataRequired(info.pSdrFuncs); @@ -1168,7 +1181,8 @@ static bool sortPriKeyOptMayBeOptimized(SLogicNode* pNode) { return false; } SSortLogicNode* pSort = (SSortLogicNode*)pNode; - if (!sortPriKeyOptIsPriKeyOrderBy(pSort->pSortKeys) || 1 != LIST_LENGTH(pSort->node.pChildren)) { + if (pSort->skipPKSortOpt || !sortPriKeyOptIsPriKeyOrderBy(pSort->pSortKeys) || + 1 != LIST_LENGTH(pSort->node.pChildren)) { return false; } SNode* pChild; @@ -1181,8 +1195,8 @@ static bool sortPriKeyOptMayBeOptimized(SLogicNode* pNode) { return true; } -static int32_t sortPriKeyOptGetSequencingNodesImpl(SLogicNode* pNode, bool groupSort, bool* pNotOptimize, - SNodeList** pSequencingNodes) { +static int32_t sortPriKeyOptGetSequencingNodesImpl(SLogicNode* pNode, bool groupSort, EOrder sortOrder, + bool* pNotOptimize, SNodeList** pSequencingNodes) { if (NULL != pNode->pLimit || NULL != pNode->pSlimit) { *pNotOptimize = false; return TSDB_CODE_SUCCESS; @@ -1199,15 +1213,21 @@ static int32_t sortPriKeyOptGetSequencingNodesImpl(SLogicNode* pNode, bool group } case QUERY_NODE_LOGIC_PLAN_JOIN: { int32_t code = sortPriKeyOptGetSequencingNodesImpl((SLogicNode*)nodesListGetNode(pNode->pChildren, 0), groupSort, - pNotOptimize, pSequencingNodes); + sortOrder, pNotOptimize, pSequencingNodes); if (TSDB_CODE_SUCCESS == code) { code = sortPriKeyOptGetSequencingNodesImpl((SLogicNode*)nodesListGetNode(pNode->pChildren, 1), groupSort, - pNotOptimize, pSequencingNodes); + sortOrder, pNotOptimize, pSequencingNodes); } return code; } - case QUERY_NODE_LOGIC_PLAN_WINDOW: - return nodesListMakeAppend(pSequencingNodes, (SNode*)pNode); + case QUERY_NODE_LOGIC_PLAN_WINDOW: { + SWindowLogicNode* pWindowLogicNode = (SWindowLogicNode*)pNode; + // For interval window, we always apply sortPriKey optimization. + // For session/event/state window, the output ts order will always be ASC. + // If sort order is also asc, we apply optimization, otherwise we keep sort node to get correct output order. + if (pWindowLogicNode->winType == WINDOW_TYPE_INTERVAL || sortOrder == ORDER_ASC) + return nodesListMakeAppend(pSequencingNodes, (SNode*)pNode); + } case QUERY_NODE_LOGIC_PLAN_AGG: case QUERY_NODE_LOGIC_PLAN_PARTITION: *pNotOptimize = true; @@ -1221,23 +1241,25 @@ static int32_t sortPriKeyOptGetSequencingNodesImpl(SLogicNode* pNode, bool group return TSDB_CODE_SUCCESS; } - return sortPriKeyOptGetSequencingNodesImpl((SLogicNode*)nodesListGetNode(pNode->pChildren, 0), groupSort, + return sortPriKeyOptGetSequencingNodesImpl((SLogicNode*)nodesListGetNode(pNode->pChildren, 0), groupSort, sortOrder, pNotOptimize, pSequencingNodes); } -static int32_t sortPriKeyOptGetSequencingNodes(SLogicNode* pNode, bool groupSort, SNodeList** pSequencingNodes) { +static EOrder sortPriKeyOptGetPriKeyOrder(SSortLogicNode* pSort) { + return ((SOrderByExprNode*)nodesListGetNode(pSort->pSortKeys, 0))->order; +} + +static int32_t sortPriKeyOptGetSequencingNodes(SSortLogicNode* pSort, bool groupSort, SNodeList** pSequencingNodes) { bool notOptimize = false; - int32_t code = sortPriKeyOptGetSequencingNodesImpl(pNode, groupSort, ¬Optimize, pSequencingNodes); + int32_t code = + sortPriKeyOptGetSequencingNodesImpl((SLogicNode*)nodesListGetNode(pSort->node.pChildren, 0), groupSort, + sortPriKeyOptGetPriKeyOrder(pSort), ¬Optimize, pSequencingNodes); if (TSDB_CODE_SUCCESS != code || notOptimize) { NODES_CLEAR_LIST(*pSequencingNodes); } return code; } -static EOrder sortPriKeyOptGetPriKeyOrder(SSortLogicNode* pSort) { - return ((SOrderByExprNode*)nodesListGetNode(pSort->pSortKeys, 0))->order; -} - static int32_t sortPriKeyOptApply(SOptimizeContext* pCxt, SLogicSubplan* pLogicSubplan, SSortLogicNode* pSort, SNodeList* pSequencingNodes) { EOrder order = sortPriKeyOptGetPriKeyOrder(pSort); @@ -1276,10 +1298,17 @@ static int32_t sortPriKeyOptApply(SOptimizeContext* pCxt, SLogicSubplan* pLogicS static int32_t sortPrimaryKeyOptimizeImpl(SOptimizeContext* pCxt, SLogicSubplan* pLogicSubplan, SSortLogicNode* pSort) { SNodeList* pSequencingNodes = NULL; - int32_t code = sortPriKeyOptGetSequencingNodes((SLogicNode*)nodesListGetNode(pSort->node.pChildren, 0), - pSort->groupSort, &pSequencingNodes); - if (TSDB_CODE_SUCCESS == code && NULL != pSequencingNodes) { - code = sortPriKeyOptApply(pCxt, pLogicSubplan, pSort, pSequencingNodes); + int32_t code = sortPriKeyOptGetSequencingNodes(pSort, pSort->groupSort, &pSequencingNodes); + if (TSDB_CODE_SUCCESS == code) { + if (pSequencingNodes != NULL) { + code = sortPriKeyOptApply(pCxt, pLogicSubplan, pSort, pSequencingNodes); + } else { + // if we decided not to push down sort info to children, we should propagate output ts order to parents of pSort + optSetParentOrder(pSort->node.pParent, sortPriKeyOptGetPriKeyOrder(pSort), 0); + // we need to prevent this pSort from being chosen to do optimization again + pSort->skipPKSortOpt = true; + pCxt->optimized = true; + } } nodesClearList(pSequencingNodes); return code; @@ -1675,6 +1704,7 @@ static int32_t partTagsOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogicSub if (TSDB_CODE_SUCCESS == code) { if (QUERY_NODE_LOGIC_PLAN_AGG == pNode->pParent->type) { SAggLogicNode* pParent = (SAggLogicNode*)(pNode->pParent); + scanPathOptSetGroupOrderScan(pScan); pParent->hasGroupKeyOptimized = true; } diff --git a/source/libs/planner/src/planPhysiCreater.c b/source/libs/planner/src/planPhysiCreater.c index b3d94a5e47aef06960954b88c612b9568f0e45d4..1b92dcd2e75acc6915c46ec2e509939c2189bc55 100644 --- a/source/libs/planner/src/planPhysiCreater.c +++ b/source/libs/planner/src/planPhysiCreater.c @@ -447,6 +447,7 @@ static int32_t createScanPhysiNodeFinalize(SPhysiPlanContext* pCxt, SSubplan* pS pScanPhysiNode->uid = pScanLogicNode->tableId; pScanPhysiNode->suid = pScanLogicNode->stableId; pScanPhysiNode->tableType = pScanLogicNode->tableType; + pScanPhysiNode->groupOrderScan = pScanLogicNode->groupOrderScan; memcpy(&pScanPhysiNode->tableName, &pScanLogicNode->tableName, sizeof(SName)); if (NULL != pScanLogicNode->pTagCond) { pSubplan->pTagCond = nodesCloneNode(pScanLogicNode->pTagCond); @@ -880,6 +881,7 @@ static int32_t createAggPhysiNode(SPhysiPlanContext* pCxt, SNodeList* pChildren, pAgg->mergeDataBlock = (GROUP_ACTION_KEEP == pAggLogicNode->node.groupAction ? false : true); pAgg->groupKeyOptimized = pAggLogicNode->hasGroupKeyOptimized; + pAgg->node.forceCreateNonBlockingOptr = pAggLogicNode->node.forceCreateNonBlockingOptr; SNodeList* pPrecalcExprs = NULL; SNodeList* pGroupKeys = NULL; diff --git a/source/libs/planner/src/planSpliter.c b/source/libs/planner/src/planSpliter.c index 246ee13fb00aa7d30857e63a03f18262ffb10510..84a486649efe4265794d24adf1bde7a295c779d7 100644 --- a/source/libs/planner/src/planSpliter.c +++ b/source/libs/planner/src/planSpliter.c @@ -306,54 +306,6 @@ static bool stbSplIsTableCountQuery(SLogicNode* pNode) { return QUERY_NODE_LOGIC_PLAN_SCAN == nodeType(pChild) && SCAN_TYPE_TABLE_COUNT == ((SScanLogicNode*)pChild)->scanType; } -static SNodeList* stbSplGetPartKeys(SLogicNode* pNode) { - if (QUERY_NODE_LOGIC_PLAN_SCAN == nodeType(pNode)) { - return ((SScanLogicNode*)pNode)->pGroupTags; - } else if (QUERY_NODE_LOGIC_PLAN_PARTITION == nodeType(pNode)) { - return ((SPartitionLogicNode*)pNode)->pPartitionKeys; - } else { - return NULL; - } -} - -static bool stbSplHasPartTbname(SNodeList* pPartKeys) { - if (NULL == pPartKeys) { - return false; - } - SNode* pPartKey = NULL; - FOREACH(pPartKey, pPartKeys) { - if (QUERY_NODE_GROUPING_SET == nodeType(pPartKey)) { - pPartKey = nodesListGetNode(((SGroupingSetNode*)pPartKey)->pParameterList, 0); - } - if ((QUERY_NODE_FUNCTION == nodeType(pPartKey) && FUNCTION_TYPE_TBNAME == ((SFunctionNode*)pPartKey)->funcType) || - (QUERY_NODE_COLUMN == nodeType(pPartKey) && COLUMN_TYPE_TBNAME == ((SColumnNode*)pPartKey)->colType)) { - return true; - } - } - return false; -} - -static bool stbSplNotSystemScan(SLogicNode* pNode) { - if (QUERY_NODE_LOGIC_PLAN_SCAN == nodeType(pNode)) { - return SCAN_TYPE_SYSTEM_TABLE != ((SScanLogicNode*)pNode)->scanType; - } else if (QUERY_NODE_LOGIC_PLAN_PARTITION == nodeType(pNode)) { - return stbSplNotSystemScan((SLogicNode*)nodesListGetNode(pNode->pChildren, 0)); - } else { - return true; - } -} - -static bool stbSplIsPartTableAgg(SAggLogicNode* pAgg) { - if (1 != LIST_LENGTH(pAgg->node.pChildren)) { - return false; - } - if (NULL != pAgg->pGroupKeys) { - return stbSplHasPartTbname(pAgg->pGroupKeys) && - stbSplNotSystemScan((SLogicNode*)nodesListGetNode(pAgg->node.pChildren, 0)); - } - return stbSplHasPartTbname(stbSplGetPartKeys((SLogicNode*)nodesListGetNode(pAgg->node.pChildren, 0))); -} - static bool stbSplNeedSplit(bool streamQuery, SLogicNode* pNode) { switch (nodeType(pNode)) { case QUERY_NODE_LOGIC_PLAN_SCAN: @@ -364,7 +316,7 @@ static bool stbSplNeedSplit(bool streamQuery, SLogicNode* pNode) { return streamQuery ? false : stbSplIsMultiTbScanChild(streamQuery, pNode); case QUERY_NODE_LOGIC_PLAN_AGG: return (!stbSplHasGatherExecFunc(((SAggLogicNode*)pNode)->pAggFuncs) || - stbSplIsPartTableAgg((SAggLogicNode*)pNode)) && + isPartTableAgg((SAggLogicNode*)pNode)) && stbSplHasMultiTbScan(streamQuery, pNode) && !stbSplIsTableCountQuery(pNode); case QUERY_NODE_LOGIC_PLAN_WINDOW: return stbSplNeedSplitWindow(streamQuery, pNode); @@ -778,10 +730,6 @@ static int32_t stbSplSplitEvent(SSplitContext* pCxt, SStableSplitInfo* pInfo) { } } -static bool stbSplIsPartTableWinodw(SWindowLogicNode* pWindow) { - return stbSplHasPartTbname(stbSplGetPartKeys((SLogicNode*)nodesListGetNode(pWindow->node.pChildren, 0))); -} - static int32_t stbSplSplitWindowForCrossTable(SSplitContext* pCxt, SStableSplitInfo* pInfo) { switch (((SWindowLogicNode*)pInfo->pSplitNode)->winType) { case WINDOW_TYPE_INTERVAL: @@ -834,7 +782,7 @@ static int32_t stbSplSplitWindowForPartTable(SSplitContext* pCxt, SStableSplitIn } static int32_t stbSplSplitWindowNode(SSplitContext* pCxt, SStableSplitInfo* pInfo) { - if (stbSplIsPartTableWinodw((SWindowLogicNode*)pInfo->pSplitNode)) { + if (isPartTableWinodw((SWindowLogicNode*)pInfo->pSplitNode)) { return stbSplSplitWindowForPartTable(pCxt, pInfo); } else { return stbSplSplitWindowForCrossTable(pCxt, pInfo); @@ -920,7 +868,7 @@ static int32_t stbSplSplitAggNodeForCrossTable(SSplitContext* pCxt, SStableSplit } static int32_t stbSplSplitAggNode(SSplitContext* pCxt, SStableSplitInfo* pInfo) { - if (stbSplIsPartTableAgg((SAggLogicNode*)pInfo->pSplitNode)) { + if (isPartTableAgg((SAggLogicNode*)pInfo->pSplitNode)) { return stbSplSplitAggNodeForPartTable(pCxt, pInfo); } return stbSplSplitAggNodeForCrossTable(pCxt, pInfo); diff --git a/source/libs/planner/src/planUtil.c b/source/libs/planner/src/planUtil.c index 29e87b34ce046d166678ab54ae5095d6e9a859fe..88086cde1d0edb91e2918a26935495be0b3120ce 100644 --- a/source/libs/planner/src/planUtil.c +++ b/source/libs/planner/src/planUtil.c @@ -321,3 +321,57 @@ int32_t adjustLogicNodeDataRequirement(SLogicNode* pNode, EDataOrderLevel requir } return code; } + +static bool stbNotSystemScan(SLogicNode* pNode) { + if (QUERY_NODE_LOGIC_PLAN_SCAN == nodeType(pNode)) { + return SCAN_TYPE_SYSTEM_TABLE != ((SScanLogicNode*)pNode)->scanType; + } else if (QUERY_NODE_LOGIC_PLAN_PARTITION == nodeType(pNode)) { + return stbNotSystemScan((SLogicNode*)nodesListGetNode(pNode->pChildren, 0)); + } else { + return true; + } +} + +static bool stbHasPartTbname(SNodeList* pPartKeys) { + if (NULL == pPartKeys) { + return false; + } + SNode* pPartKey = NULL; + FOREACH(pPartKey, pPartKeys) { + if (QUERY_NODE_GROUPING_SET == nodeType(pPartKey)) { + pPartKey = nodesListGetNode(((SGroupingSetNode*)pPartKey)->pParameterList, 0); + } + if ((QUERY_NODE_FUNCTION == nodeType(pPartKey) && FUNCTION_TYPE_TBNAME == ((SFunctionNode*)pPartKey)->funcType) || + (QUERY_NODE_COLUMN == nodeType(pPartKey) && COLUMN_TYPE_TBNAME == ((SColumnNode*)pPartKey)->colType)) { + return true; + } + } + return false; +} + +static SNodeList* stbSplGetPartKeys(SLogicNode* pNode) { + if (QUERY_NODE_LOGIC_PLAN_SCAN == nodeType(pNode)) { + return ((SScanLogicNode*)pNode)->pGroupTags; + } else if (QUERY_NODE_LOGIC_PLAN_PARTITION == nodeType(pNode)) { + return ((SPartitionLogicNode*)pNode)->pPartitionKeys; + } else { + return NULL; + } +} + +bool isPartTableAgg(SAggLogicNode* pAgg) { + if (1 != LIST_LENGTH(pAgg->node.pChildren)) { + return false; + } + if (NULL != pAgg->pGroupKeys) { + return stbHasPartTbname(pAgg->pGroupKeys) && + stbNotSystemScan((SLogicNode*)nodesListGetNode(pAgg->node.pChildren, 0)); + } + return stbHasPartTbname(stbSplGetPartKeys((SLogicNode*)nodesListGetNode(pAgg->node.pChildren, 0))); +} + +bool isPartTableWinodw(SWindowLogicNode* pWindow) { + return stbHasPartTbname(stbSplGetPartKeys((SLogicNode*)nodesListGetNode(pWindow->node.pChildren, 0))); +} + + diff --git a/source/libs/scheduler/src/scheduler.c b/source/libs/scheduler/src/scheduler.c index e7561ccb7ecf68c5030e12a19c04f3d1a9661cb5..841066a4c9caebc3c69ea86a8fc8533f17289999 100644 --- a/source/libs/scheduler/src/scheduler.c +++ b/source/libs/scheduler/src/scheduler.c @@ -35,7 +35,7 @@ int32_t schedulerInit() { schMgmt.cfg.schPolicy = SCHEDULE_DEFAULT_POLICY; schMgmt.cfg.enableReSchedule = true; - qDebug("schedule init, policy: %d, maxNodeTableNum: %" PRId64", reSchedule:%d", + qDebug("schedule init, policy: %d, maxNodeTableNum: %" PRId64", reSchedule:%d", schMgmt.cfg.schPolicy, schMgmt.cfg.maxNodeTableNum, schMgmt.cfg.enableReSchedule); schMgmt.jobRef = taosOpenRef(schMgmt.cfg.maxJobNum, schFreeJobImpl); @@ -57,11 +57,11 @@ int32_t schedulerInit() { } if (taosGetSystemUUID((char *)&schMgmt.sId, sizeof(schMgmt.sId))) { - qError("generate schdulerId failed, errno:%d", errno); + qError("generate schedulerId failed, errno:%d", errno); SCH_ERR_RET(TSDB_CODE_QRY_SYS_ERROR); } - qInfo("scheduler 0x%" PRIx64 " initizlized, maxJob:%u", schMgmt.sId, schMgmt.cfg.maxJobNum); + qInfo("scheduler 0x%" PRIx64 " initialized, maxJob:%u", schMgmt.sId, schMgmt.cfg.maxJobNum); return TSDB_CODE_SUCCESS; } diff --git a/source/libs/stream/inc/streamInt.h b/source/libs/stream/inc/streamInt.h index 2164b63cafa6a0083af1e6552f209f5614f2fb5c..32d6dc65d93ab1ddf1a6e383645652b70110f351 100644 --- a/source/libs/stream/inc/streamInt.h +++ b/source/libs/stream/inc/streamInt.h @@ -31,6 +31,12 @@ typedef struct { void* timer; } SStreamGlobalEnv; +typedef struct { + SEpSet epset; + int32_t taskId; + SRpcMsg msg; +} SStreamContinueExecInfo; + extern SStreamGlobalEnv streamEnv; void streamRetryDispatchStreamBlock(SStreamTask* pTask, int64_t waitDuration); @@ -54,6 +60,10 @@ int32_t streamDoDispatchScanHistoryFinishMsg(SStreamTask* pTask, const SStreamSc SStreamQueueItem* streamMergeQueueItem(SStreamQueueItem* dst, SStreamQueueItem* pElem); +int32_t streamAddEndScanHistoryMsg(SStreamTask* pTask, SRpcHandleInfo* pRpcInfo, SStreamScanHistoryFinishReq* pReq); +int32_t streamNotifyUpstreamContinue(SStreamTask* pTask); +int32_t streamTaskFillHistoryFinished(SStreamTask* pTask); + extern int32_t streamBackendId; extern int32_t streamBackendCfWrapperId; diff --git a/source/libs/stream/src/stream.c b/source/libs/stream/src/stream.c index 07d7cb30407b569f45cd21f55c65fbcdc677db71..f85ade591ca48f5403c4c1e0ee59f87a5760462a 100644 --- a/source/libs/stream/src/stream.c +++ b/source/libs/stream/src/stream.c @@ -65,7 +65,7 @@ static void streamSchedByTimer(void* param, void* tmrId) { SStreamTask* pTask = (void*)param; int8_t status = atomic_load_8(&pTask->triggerStatus); - qDebug("s-task:%s in scheduler timer, trigger status:%d", pTask->id.idStr, status); + qDebug("s-task:%s in scheduler, trigger status:%d, next:%dms", pTask->id.idStr, status, (int32_t)pTask->triggerParam); if (streamTaskShouldStop(&pTask->status) || streamTaskShouldPause(&pTask->status)) { streamMetaReleaseTask(NULL, pTask); @@ -74,23 +74,22 @@ static void streamSchedByTimer(void* param, void* tmrId) { } if (status == TASK_TRIGGER_STATUS__ACTIVE) { - SStreamTrigger* trigger = taosAllocateQitem(sizeof(SStreamTrigger), DEF_QITEM, 0); - if (trigger == NULL) { + SStreamTrigger* pTrigger = taosAllocateQitem(sizeof(SStreamTrigger), DEF_QITEM, 0); + if (pTrigger == NULL) { return; } - trigger->type = STREAM_INPUT__GET_RES; - trigger->pBlock = taosMemoryCalloc(1, sizeof(SSDataBlock)); - if (trigger->pBlock == NULL) { - taosFreeQitem(trigger); + pTrigger->type = STREAM_INPUT__GET_RES; + pTrigger->pBlock = taosMemoryCalloc(1, sizeof(SSDataBlock)); + if (pTrigger->pBlock == NULL) { + taosFreeQitem(pTrigger); return; } - trigger->pBlock->info.type = STREAM_GET_ALL; atomic_store_8(&pTask->triggerStatus, TASK_TRIGGER_STATUS__INACTIVE); - - if (tAppendDataToInputQueue(pTask, (SStreamQueueItem*)trigger) < 0) { - taosFreeQitem(trigger); + pTrigger->pBlock->info.type = STREAM_GET_ALL; + if (tAppendDataToInputQueue(pTask, (SStreamQueueItem*)pTrigger) < 0) { + taosFreeQitem(pTrigger); taosTmrReset(streamSchedByTimer, (int32_t)pTask->triggerParam, pTask, streamEnv.timer, &pTask->schedTimer); return; } @@ -102,7 +101,7 @@ static void streamSchedByTimer(void* param, void* tmrId) { } int32_t streamSetupScheduleTrigger(SStreamTask* pTask) { - if (pTask->triggerParam != 0) { + if (pTask->triggerParam != 0 && pTask->info.fillHistory == 0) { int32_t ref = atomic_add_fetch_32(&pTask->refCnt, 1); ASSERT(ref == 2 && pTask->schedTimer == NULL); @@ -216,15 +215,16 @@ int32_t streamTaskEnqueueRetrieve(SStreamTask* pTask, SStreamRetrieveReq* pReq, // todo add log int32_t streamTaskOutputResultBlock(SStreamTask* pTask, SStreamDataBlock* pBlock) { int32_t code = 0; - if (pTask->outputType == TASK_OUTPUT__TABLE) { + int32_t type = pTask->outputInfo.type; + if (type == TASK_OUTPUT__TABLE) { pTask->tbSink.tbSinkFunc(pTask, pTask->tbSink.vnode, 0, pBlock->blocks); destroyStreamDataBlock(pBlock); - } else if (pTask->outputType == TASK_OUTPUT__SMA) { + } else if (type == TASK_OUTPUT__SMA) { pTask->smaSink.smaSink(pTask->smaSink.vnode, pTask->smaSink.smaId, pBlock->blocks); destroyStreamDataBlock(pBlock); } else { - ASSERT(pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH || pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH); - code = taosWriteQitem(pTask->outputQueue->queue, pBlock); + ASSERT(type == TASK_OUTPUT__FIXED_DISPATCH || type == TASK_OUTPUT__SHUFFLE_DISPATCH); + code = taosWriteQitem(pTask->outputInfo.queue->queue, pBlock); if (code != 0) { // todo failed to add it into the output queue, free it. return code; } @@ -261,20 +261,21 @@ int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, i // in case of the input queue is full, the code will be TSDB_CODE_SUCCESS, the and pRsp>inputStatus will be set // flag. here we need to retry dispatch this message to downstream task immediately. handle the case the failure // happened too fast. todo handle the shuffle dispatch failure - qError("s-task:%s failed to dispatch msg to task:0x%x, code:%s, retry cnt:%d", pTask->id.idStr, - pRsp->downstreamTaskId, tstrerror(code), ++pTask->msgInfo.retryCount); - int32_t ret = streamDispatchAllBlocks(pTask, pTask->msgInfo.pData); - if (ret != TSDB_CODE_SUCCESS) { - + if (code == TSDB_CODE_STREAM_TASK_NOT_EXIST) { + qError("s-task:%s failed to dispatch msg to task:0x%x, code:%s, no-retry", pTask->id.idStr, + pRsp->downstreamTaskId, tstrerror(code)); + return code; + } else { + qError("s-task:%s failed to dispatch msg to task:0x%x, code:%s, retry cnt:%d", pTask->id.idStr, + pRsp->downstreamTaskId, tstrerror(code), ++pTask->msgInfo.retryCount); + return streamDispatchAllBlocks(pTask, pTask->msgInfo.pData); } - - return TSDB_CODE_SUCCESS; } qDebug("s-task:%s receive dispatch rsp, output status:%d code:%d", pTask->id.idStr, pRsp->inputStatus, code); // there are other dispatch message not response yet - if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) { + if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) { int32_t leftRsp = atomic_sub_fetch_32(&pTask->shuffleDispatcher.waitingRspCnt, 1); qDebug("s-task:%s is shuffle, left waiting rsp %d", pTask->id.idStr, leftRsp); if (leftRsp > 0) { @@ -283,9 +284,9 @@ int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, i } pTask->msgInfo.retryCount = 0; - ASSERT(pTask->outputStatus == TASK_OUTPUT_STATUS__WAIT); + ASSERT(pTask->outputInfo.status == TASK_OUTPUT_STATUS__WAIT); - qDebug("s-task:%s output status is set to:%d", pTask->id.idStr, pTask->outputStatus); + qDebug("s-task:%s output status is set to:%d", pTask->id.idStr, pTask->outputInfo.status); // the input queue of the (down stream) task that receive the output data is full, // so the TASK_INPUT_STATUS_BLOCKED is rsp @@ -309,7 +310,7 @@ int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, i } // now ready for next data output - atomic_store_8(&pTask->outputStatus, TASK_OUTPUT_STATUS__NORMAL); + atomic_store_8(&pTask->outputInfo.status, TASK_OUTPUT_STATUS__NORMAL); // otherwise, continue dispatch the first block to down stream task in pipeline streamDispatchStreamBlock(pTask); @@ -323,9 +324,6 @@ int32_t streamProcessRunReq(SStreamTask* pTask) { return -1; } - /*if (pTask->dispatchType == TASK_OUTPUT__FIXED_DISPATCH || pTask->dispatchType == TASK_OUTPUT__SHUFFLE_DISPATCH) {*/ - /*streamDispatchStreamBlock(pTask);*/ - /*}*/ return 0; } @@ -358,6 +356,9 @@ int32_t tAppendDataToInputQueue(SStreamTask* pTask, SStreamQueueItem* pItem) { return -1; } + int32_t msgLen = px->submit.msgLen; + int64_t ver = px->submit.ver; + int32_t code = taosWriteQitem(pTask->inputQueue->queue, pItem); if (code != TSDB_CODE_SUCCESS) { streamDataSubmitDestroy(px); @@ -365,8 +366,9 @@ int32_t tAppendDataToInputQueue(SStreamTask* pTask, SStreamQueueItem* pItem) { return code; } + // use the local variable to avoid the pItem be freed by other threads, since it has been put into queue already. qDebug("s-task:%s submit enqueue msgLen:%d ver:%" PRId64 ", total in queue:%d, size:%.2fMiB", pTask->id.idStr, - px->submit.msgLen, px->submit.ver, total, size + px->submit.msgLen/1048576.0); + msgLen, ver, total, size + msgLen/1048576.0); } else if (type == STREAM_INPUT__DATA_BLOCK || type == STREAM_INPUT__DATA_RETRIEVE || type == STREAM_INPUT__REF_DATA_BLOCK) { if ((pTask->info.taskLevel == TASK_LEVEL__SOURCE) && (tInputQueueIsFull(pTask))) { @@ -377,7 +379,7 @@ int32_t tAppendDataToInputQueue(SStreamTask* pTask, SStreamQueueItem* pItem) { return -1; } - qDebug("s-task:%s data block enqueue, current(blocks:%d, size:%.2fMiB)", pTask->id.idStr, total, size); + qDebug("s-task:%s blockdata enqueue, total in queue:%d, size:%.2fMiB", pTask->id.idStr, total, size); int32_t code = taosWriteQitem(pTask->inputQueue->queue, pItem); if (code != TSDB_CODE_SUCCESS) { destroyStreamDataBlock((SStreamDataBlock*) pItem); @@ -393,6 +395,7 @@ int32_t tAppendDataToInputQueue(SStreamTask* pTask, SStreamQueueItem* pItem) { if (type != STREAM_INPUT__GET_RES && type != STREAM_INPUT__CHECKPOINT && pTask->triggerParam != 0) { atomic_val_compare_exchange_8(&pTask->triggerStatus, TASK_TRIGGER_STATUS__INACTIVE, TASK_TRIGGER_STATUS__ACTIVE); + qDebug("s-task:%s new data arrived, active the trigger, trigerStatus:%d", pTask->id.idStr, pTask->triggerStatus); } return 0; @@ -418,4 +421,16 @@ void* streamQueueNextItem(SStreamQueue* pQueue) { } } -void streamTaskInputFail(SStreamTask* pTask) { atomic_store_8(&pTask->inputStatus, TASK_INPUT_STATUS__FAILED); } \ No newline at end of file +void streamTaskInputFail(SStreamTask* pTask) { atomic_store_8(&pTask->inputStatus, TASK_INPUT_STATUS__FAILED); } + +SStreamChildEpInfo * streamTaskGetUpstreamTaskEpInfo(SStreamTask* pTask, int32_t taskId) { + int32_t num = taosArrayGetSize(pTask->pUpstreamEpInfoList); + for(int32_t i = 0; i < num; ++i) { + SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->pUpstreamEpInfoList, i); + if (pInfo->taskId == taskId) { + return pInfo; + } + } + + return NULL; +} \ No newline at end of file diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index 18ec80e87a2777bb53967c8a23bfa1036586c74b..8534f3b0a11503101bf0a49c1f0a8aaf6811a4c7 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -253,8 +253,8 @@ void streamBackendCleanup(void* arg) { taosThreadMutexDestroy(&pHandle->cfMutex); - taosMemoryFree(pHandle); qDebug("destroy stream backend backend:%p", pHandle); + taosMemoryFree(pHandle); return; } void streamBackendHandleCleanup(void* arg) { @@ -796,8 +796,8 @@ int32_t streamStateOpenBackendCf(void* backend, char* name, char** cfs, int32_t char suffix[64] = {0}; rocksdb_options_t** cfOpts = taosMemoryCalloc(nCf, sizeof(rocksdb_options_t*)); - RocksdbCfParam* params = taosMemoryCalloc(nCf, sizeof(RocksdbCfParam*)); - rocksdb_comparator_t** pCompare = taosMemoryCalloc(nCf, sizeof(rocksdb_comparator_t**)); + RocksdbCfParam* params = taosMemoryCalloc(nCf, sizeof(RocksdbCfParam)); + rocksdb_comparator_t** pCompare = taosMemoryCalloc(nCf, sizeof(rocksdb_comparator_t*)); rocksdb_column_family_handle_t** cfHandle = taosMemoryCalloc(nCf, sizeof(rocksdb_column_family_handle_t*)); for (int i = 0; i < nCf; i++) { @@ -960,7 +960,7 @@ int streamStateOpenBackend(void* backend, SStreamState* pState) { param[i].tableOpt = tableOpt; }; - rocksdb_comparator_t** pCompare = taosMemoryCalloc(cfLen, sizeof(rocksdb_comparator_t**)); + rocksdb_comparator_t** pCompare = taosMemoryCalloc(cfLen, sizeof(rocksdb_comparator_t*)); for (int i = 0; i < cfLen; i++) { SCfInit* cf = &ginitDict[i]; @@ -1066,15 +1066,15 @@ rocksdb_iterator_t* streamStateIterCreate(SStreamState* pState, const char* cfNa rocksdb_readoptions_t** readOpt) { int idx = streamStateGetCfIdx(pState, cfName); + rocksdb_readoptions_t* rOpt = rocksdb_readoptions_create(); + *readOpt = rOpt; + SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; if (snapshot != NULL) { *snapshot = (rocksdb_snapshot_t*)rocksdb_create_snapshot(wrapper->rocksdb); + rocksdb_readoptions_set_snapshot(rOpt, *snapshot); + rocksdb_readoptions_set_fill_cache(rOpt, 0); } - rocksdb_readoptions_t* rOpt = rocksdb_readoptions_create(); - *readOpt = rOpt; - - rocksdb_readoptions_set_snapshot(rOpt, *snapshot); - rocksdb_readoptions_set_fill_cache(rOpt, 0); return rocksdb_create_iterator_cf(wrapper->rocksdb, rOpt, ((rocksdb_column_family_handle_t**)wrapper->pHandle)[idx]); } @@ -1101,8 +1101,8 @@ rocksdb_iterator_t* streamStateIterCreate(SStreamState* pState, const char* cfNa int32_t ttlVLen = ginitDict[i].enValueFunc((char*)value, vLen, 0, &ttlV); \ rocksdb_put_cf(db, opts, pHandle, (const char*)buf, klen, (const char*)ttlV, (size_t)ttlVLen, &err); \ if (err != NULL) { \ - taosMemoryFree(err); \ qError("streamState str: %s failed to write to %s, err: %s", toString, funcname, err); \ + taosMemoryFree(err); \ code = -1; \ } else { \ qTrace("streamState str:%s succ to write to %s, rowValLen:%d, ttlValLen:%d", toString, funcname, vLen, ttlVLen); \ @@ -1263,6 +1263,8 @@ int32_t streamStateGetGroupKVByCur_rocksdb(SStreamStateCur* pCur, SWinKey* pKey, if (pKey->groupId == groupId) { return 0; } + taosMemoryFree((void*)*pVal); + *pVal = NULL; } return -1; } @@ -1440,8 +1442,6 @@ int32_t streamStateSessionPut_rocksdb(SStreamState* pState, const SSessionKey* k int code = 0; SStateSessionKey sKey = {.key = *key, .opNum = pState->number}; STREAM_STATE_PUT_ROCKSDB(pState, "sess", &sKey, value, vLen); - if (code == -1) { - } return code; } int32_t streamStateSessionGet_rocksdb(SStreamState* pState, SSessionKey* key, void** pVal, int32_t* pVLen) { @@ -1459,8 +1459,10 @@ int32_t streamStateSessionGet_rocksdb(SStreamState* pState, SSessionKey* key, vo code = -1; } else { *key = resKey; - *pVal = taosMemoryCalloc(1, *pVLen); - memcpy(*pVal, tmp, *pVLen); + if (pVal != NULL && pVLen != NULL) { + *pVal = taosMemoryCalloc(1, *pVLen); + memcpy(*pVal, tmp, *pVLen); + } } } taosMemoryFree(tmp); @@ -1864,7 +1866,6 @@ int32_t streamStateSessionAddIfNotExist_rocksdb(SStreamState* pState, SSessionKe if (sessionRangeKeyCmpr(&searchKey, key) == 0) { memcpy(tmp, *pVal, valSize); taosMemoryFreeClear(*pVal); - streamStateSessionDel_rocksdb(pState, key); goto _end; } taosMemoryFreeClear(*pVal); @@ -1880,7 +1881,6 @@ int32_t streamStateSessionAddIfNotExist_rocksdb(SStreamState* pState, SSessionKe if (code == 0) { if (sessionRangeKeyCmpr(&searchKey, key) == 0) { memcpy(tmp, *pVal, valSize); - streamStateSessionDel_rocksdb(pState, key); goto _end; } } @@ -1938,14 +1938,12 @@ int32_t streamStateStateAddIfNotExist_rocksdb(SStreamState* pState, SSessionKey* if (code == 0) { if (key->win.skey <= tmpKey.win.skey && tmpKey.win.ekey <= key->win.ekey) { memcpy(tmp, *pVal, valSize); - streamStateSessionDel_rocksdb(pState, key); goto _end; } void* stateKey = (char*)(*pVal) + (valSize - keyDataLen); if (fn(pKeyData, stateKey) == true) { memcpy(tmp, *pVal, valSize); - streamStateSessionDel_rocksdb(pState, key); goto _end; } @@ -1961,7 +1959,6 @@ int32_t streamStateStateAddIfNotExist_rocksdb(SStreamState* pState, SSessionKey* void* stateKey = (char*)(*pVal) + (valSize - keyDataLen); if (fn(pKeyData, stateKey) == true) { memcpy(tmp, *pVal, valSize); - streamStateSessionDel_rocksdb(pState, key); goto _end; } } diff --git a/source/libs/stream/src/streamData.c b/source/libs/stream/src/streamData.c index 92f1fc47abb8211d652e666fccd6f8e5245d8c5f..bb4b842787a640435f561d6e75074869da8885af 100644 --- a/source/libs/stream/src/streamData.c +++ b/source/libs/stream/src/streamData.c @@ -166,6 +166,8 @@ int32_t streamMergeSubmit(SStreamMergedSubmit* pMerged, SStreamDataSubmit* pSubm } SStreamQueueItem* streamMergeQueueItem(SStreamQueueItem* dst, SStreamQueueItem* pElem) { + terrno = 0; + if (dst->type == STREAM_INPUT__DATA_BLOCK && pElem->type == STREAM_INPUT__DATA_BLOCK) { SStreamDataBlock* pBlock = (SStreamDataBlock*)dst; SStreamDataBlock* pBlockSrc = (SStreamDataBlock*)pElem; @@ -181,7 +183,10 @@ SStreamQueueItem* streamMergeQueueItem(SStreamQueueItem* dst, SStreamQueueItem* return dst; } else if (dst->type == STREAM_INPUT__DATA_SUBMIT && pElem->type == STREAM_INPUT__DATA_SUBMIT) { SStreamMergedSubmit* pMerged = streamMergedSubmitNew(); - // todo handle error + if (pMerged == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return NULL; + } streamMergeSubmit(pMerged, (SStreamDataSubmit*)dst); streamMergeSubmit(pMerged, (SStreamDataSubmit*)pElem); @@ -189,6 +194,7 @@ SStreamQueueItem* streamMergeQueueItem(SStreamQueueItem* dst, SStreamQueueItem* taosFreeQitem(pElem); return (SStreamQueueItem*)pMerged; } else { + qDebug("block type:%d not merged with existed blocks list, type:%d", pElem->type, dst->type); return NULL; } } diff --git a/source/libs/stream/src/streamDispatch.c b/source/libs/stream/src/streamDispatch.c index 9241df2e707f8d74fe2798640c3fcd29f3e9ac71..8334ea1c88b05f6516acf136f4da69424fc7f1c7 100644 --- a/source/libs/stream/src/streamDispatch.c +++ b/source/libs/stream/src/streamDispatch.c @@ -25,6 +25,12 @@ typedef struct SBlockName { char parTbName[TSDB_TABLE_NAME_LEN]; } SBlockName; +static void initRpcMsg(SRpcMsg* pMsg, int32_t msgType, void* pCont, int32_t contLen) { + pMsg->msgType = msgType; + pMsg->pCont = pCont; + pMsg->contLen = contLen; +} + static int32_t tEncodeStreamDispatchReq(SEncoder* pEncoder, const SStreamDispatchReq* pReq) { if (tStartEncode(pEncoder) < 0) return -1; if (tEncodeI64(pEncoder, pReq->streamId) < 0) return -1; @@ -311,13 +317,12 @@ int32_t streamDoDispatchScanHistoryFinishMsg(SStreamTask* pTask, const SStreamSc msg.contLen = tlen + sizeof(SMsgHead); msg.pCont = buf; msg.msgType = TDMT_STREAM_SCAN_HISTORY_FINISH; - msg.info.noResp = 1; tmsgSendReq(pEpSet, &msg); const char* pStatus = streamGetTaskStatusStr(pTask->status.taskStatus); - qDebug("s-task:%s status:%s dispatch scan-history-data finish msg to taskId:0x%x (vgId:%d)", pTask->id.idStr, pStatus, - pReq->taskId, vgId); + qDebug("s-task:%s status:%s dispatch scan-history finish msg to taskId:0x%x (vgId:%d)", pTask->id.idStr, pStatus, + pReq->downstreamTaskId, vgId); return 0; } @@ -437,7 +442,7 @@ int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pDat int32_t numOfBlocks = taosArrayGetSize(pData->blocks); ASSERT(numOfBlocks != 0); - if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) { + if (pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH) { SStreamDispatchReq req = {0}; int32_t downstreamTaskId = pTask->fixedEpDispatcher.taskId; @@ -467,7 +472,7 @@ int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pDat taosArrayDestroyP(req.data, taosMemoryFree); taosArrayDestroy(req.dataLen); return code; - } else if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) { + } else if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) { int32_t rspCnt = atomic_load_32(&pTask->shuffleDispatcher.waitingRspCnt); ASSERT(rspCnt == 0); @@ -545,7 +550,7 @@ int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pDat static void doRetryDispatchData(void* param, void* tmrId) { SStreamTask* pTask = param; - ASSERT(pTask->outputStatus == TASK_OUTPUT_STATUS__WAIT); + ASSERT(pTask->outputInfo.status == TASK_OUTPUT_STATUS__WAIT); int32_t code = streamDispatchAllBlocks(pTask, pTask->msgInfo.pData); if (code != TSDB_CODE_SUCCESS) { @@ -561,29 +566,29 @@ void streamRetryDispatchStreamBlock(SStreamTask* pTask, int64_t waitDuration) { } int32_t streamDispatchStreamBlock(SStreamTask* pTask) { - ASSERT((pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH || pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH)); + STaskOutputInfo* pInfo = &pTask->outputInfo; + ASSERT((pInfo->type == TASK_OUTPUT__FIXED_DISPATCH || pInfo->type == TASK_OUTPUT__SHUFFLE_DISPATCH)); - int32_t numOfElems = taosQueueItemSize(pTask->outputQueue->queue); + int32_t numOfElems = taosQueueItemSize(pInfo->queue->queue); if (numOfElems > 0) { qDebug("s-task:%s try to dispatch intermediate result block to downstream, elem in outputQ:%d", pTask->id.idStr, numOfElems); } // to make sure only one dispatch is running - int8_t old = - atomic_val_compare_exchange_8(&pTask->outputStatus, TASK_OUTPUT_STATUS__NORMAL, TASK_OUTPUT_STATUS__WAIT); + int8_t old = atomic_val_compare_exchange_8(&pInfo->status, TASK_OUTPUT_STATUS__NORMAL, TASK_OUTPUT_STATUS__WAIT); if (old != TASK_OUTPUT_STATUS__NORMAL) { qDebug("s-task:%s wait for dispatch rsp, not dispatch now, output status:%d", pTask->id.idStr, old); return 0; } ASSERT(pTask->msgInfo.pData == NULL); - qDebug("s-task:%s start to dispatch msg, set output status:%d", pTask->id.idStr, pTask->outputStatus); + qDebug("s-task:%s start to dispatch msg, set output status:%d", pTask->id.idStr, pInfo->status); - SStreamDataBlock* pBlock = streamQueueNextItem(pTask->outputQueue); + SStreamDataBlock* pBlock = streamQueueNextItem(pInfo->queue); if (pBlock == NULL) { - atomic_store_8(&pTask->outputStatus, TASK_OUTPUT_STATUS__NORMAL); - qDebug("s-task:%s not dispatch since no elems in outputQ, output status:%d", pTask->id.idStr, pTask->outputStatus); + atomic_store_8(&pInfo->status, TASK_OUTPUT_STATUS__NORMAL); + qDebug("s-task:%s not dispatch since no elems in outputQ, output status:%d", pTask->id.idStr, pInfo->status); return 0; } @@ -599,19 +604,19 @@ int32_t streamDispatchStreamBlock(SStreamTask* pTask) { } qDebug("s-task:%s failed to dispatch msg to downstream, code:%s, output status:%d, retry cnt:%d", pTask->id.idStr, - tstrerror(terrno), pTask->outputStatus, retryCount); + tstrerror(terrno), pInfo->status, retryCount); // todo deal with only partially success dispatch case atomic_store_32(&pTask->shuffleDispatcher.waitingRspCnt, 0); - if (terrno == TSDB_CODE_APP_IS_STOPPING) { // in case of this error, do not retry anymore + if (terrno == TSDB_CODE_APP_IS_STOPPING) { // in case of this error, do not retry anymore destroyStreamDataBlock(pTask->msgInfo.pData); pTask->msgInfo.pData = NULL; return code; } - if (++retryCount > MAX_CONTINUE_RETRY_COUNT) { // add to timer to retry - qDebug("s-task:%s failed to dispatch msg to downstream for %d times, code:%s, add timer to retry in %dms", pTask->id.idStr, - retryCount, tstrerror(terrno), DISPATCH_RETRY_INTERVAL_MS); + if (++retryCount > MAX_CONTINUE_RETRY_COUNT) { // add to timer to retry + qDebug("s-task:%s failed to dispatch msg to downstream for %d times, code:%s, add timer to retry in %dms", + pTask->id.idStr, retryCount, tstrerror(terrno), DISPATCH_RETRY_INTERVAL_MS); streamRetryDispatchStreamBlock(pTask, DISPATCH_RETRY_INTERVAL_MS); break; } @@ -620,3 +625,93 @@ int32_t streamDispatchStreamBlock(SStreamTask* pTask) { // this block can not be deleted until it has been sent to downstream task successfully. return TSDB_CODE_SUCCESS; } + +int32_t tEncodeCompleteHistoryDataMsg(SEncoder* pEncoder, const SStreamCompleteHistoryMsg* pReq) { + if (tStartEncode(pEncoder) < 0) return -1; + if (tEncodeI64(pEncoder, pReq->streamId) < 0) return -1; + if (tEncodeI32(pEncoder, pReq->downstreamId) < 0) return -1; + if (tEncodeI32(pEncoder, pReq->downstreamNode) < 0) return -1; + if (tEncodeI32(pEncoder, pReq->upstreamTaskId) < 0) return -1; + if (tEncodeI32(pEncoder, pReq->upstreamNodeId) < 0) return -1; + tEndEncode(pEncoder); + return pEncoder->pos; +} + +int32_t tDecodeCompleteHistoryDataMsg(SDecoder* pDecoder, SStreamCompleteHistoryMsg* pRsp) { + if (tStartDecode(pDecoder) < 0) return -1; + if (tDecodeI64(pDecoder, &pRsp->streamId) < 0) return -1; + if (tDecodeI32(pDecoder, &pRsp->downstreamId) < 0) return -1; + if (tDecodeI32(pDecoder, &pRsp->downstreamNode) < 0) return -1; + if (tDecodeI32(pDecoder, &pRsp->upstreamTaskId) < 0) return -1; + if (tDecodeI32(pDecoder, &pRsp->upstreamNodeId) < 0) return -1; + tEndDecode(pDecoder); + return 0; +} + +int32_t streamAddEndScanHistoryMsg(SStreamTask* pTask, SRpcHandleInfo* pRpcInfo, SStreamScanHistoryFinishReq* pReq) { + int32_t len = 0; + int32_t code = 0; + SEncoder encoder; + + SStreamCompleteHistoryMsg msg = { + .streamId = pReq->streamId, + .upstreamTaskId = pReq->upstreamTaskId, + .upstreamNodeId = pReq->upstreamNodeId, + .downstreamId = pReq->downstreamTaskId, + .downstreamNode = pTask->pMeta->vgId, + }; + + tEncodeSize(tEncodeCompleteHistoryDataMsg, &msg, len, code); + if (code < 0) { + return code; + } + + void* pBuf = rpcMallocCont(sizeof(SMsgHead) + len); + if (pBuf == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + ((SMsgHead*)pBuf)->vgId = htonl(pReq->upstreamNodeId); + + void* abuf = POINTER_SHIFT(pBuf, sizeof(SMsgHead)); + + tEncoderInit(&encoder, (uint8_t*)abuf, len); + tEncodeCompleteHistoryDataMsg(&encoder, &msg); + tEncoderClear(&encoder); + + SStreamChildEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, pReq->upstreamTaskId); + + SStreamContinueExecInfo info = {.taskId = pReq->upstreamTaskId, .epset = pInfo->epSet}; + initRpcMsg(&info.msg, 0, pBuf, sizeof(SMsgHead) + len); + info.msg.info = *pRpcInfo; + + taosThreadMutexLock(&pTask->lock); + if (pTask->pRspMsgList == NULL) { + pTask->pRspMsgList = taosArrayInit(4, sizeof(SStreamContinueExecInfo)); + } + taosArrayPush(pTask->pRspMsgList, &info); + taosThreadMutexUnlock(&pTask->lock); + + int32_t num = taosArrayGetSize(pTask->pRspMsgList); + qDebug("s-task:%s add scan history finish rsp msg for task:0x%x, total:%d", pTask->id.idStr, pReq->upstreamTaskId, + num); + return TSDB_CODE_SUCCESS; +} + +int32_t streamNotifyUpstreamContinue(SStreamTask* pTask) { + ASSERT(pTask->info.taskLevel == TASK_LEVEL__AGG || pTask->info.taskLevel == TASK_LEVEL__SINK); + + int32_t num = taosArrayGetSize(pTask->pRspMsgList); + for (int32_t i = 0; i < num; ++i) { + SStreamContinueExecInfo* pInfo = taosArrayGet(pTask->pRspMsgList, i); + tmsgSendRsp(&pInfo->msg); + + qDebug("s-task:%s level:%d notify upstream:0x%x to continue process data from WAL", pTask->id.idStr, pTask->info.taskLevel, + pInfo->taskId); + } + + taosArrayClear(pTask->pRspMsgList); + qDebug("s-task:%s level:%d checkpoint ready msg sent to all %d upstreams", pTask->id.idStr, pTask->info.taskLevel, + num); + return 0; +} diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index c8aa6f56157965108d3f554e493d287ba51dd232..34370ebce9a8962b6a5c81fd3de112753d779471 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -29,7 +29,7 @@ bool streamTaskShouldStop(const SStreamStatus* pStatus) { bool streamTaskShouldPause(const SStreamStatus* pStatus) { int32_t status = atomic_load_8((int8_t*)&pStatus->taskStatus); - return (status == TASK_STATUS__PAUSE); + return (status == TASK_STATUS__PAUSE || status == TASK_STATUS__HALT); } static int32_t doDumpResult(SStreamTask* pTask, SStreamQueueItem* pItem, SArray* pRes, int32_t size, int64_t* totalSize, @@ -162,23 +162,28 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, i return code; } -int32_t streamScanExec(SStreamTask* pTask, int32_t batchSz) { - int32_t code = 0; - +int32_t streamScanExec(SStreamTask* pTask, int32_t batchSize) { ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE); - void* exec = pTask->exec.pExecutor; + int32_t code = TSDB_CODE_SUCCESS; + void* exec = pTask->exec.pExecutor; + bool finished = false; qSetStreamOpOpen(exec); - bool finished = false; - while (1) { + while (!finished) { + if (streamTaskShouldPause(&pTask->status)) { + double el = (taosGetTimestampMs() - pTask->tsInfo.step1Start) / 1000.0; + qDebug("s-task:%s paused from the scan-history task, elapsed time:%.2fsec", pTask->id.idStr, el); + break; + } + SArray* pRes = taosArrayInit(0, sizeof(SSDataBlock)); if (pRes == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; return -1; } - int32_t batchCnt = 0; + int32_t numOfBlocks = 0; while (1) { if (streamTaskShouldStop(&pTask->status)) { taosArrayDestroyEx(pRes, (FDelete)blockDataFreeRes); @@ -187,34 +192,15 @@ int32_t streamScanExec(SStreamTask* pTask, int32_t batchSz) { SSDataBlock* output = NULL; uint64_t ts = 0; - if (qExecTask(exec, &output, &ts) < 0) { + code = qExecTask(exec, &output, &ts); + if (code != TSDB_CODE_TSC_QUERY_KILLED && code != TSDB_CODE_SUCCESS) { + qError("%s scan-history data error occurred code:%s, continue scan", pTask->id.idStr, tstrerror(code)); continue; } + // the generated results before fill-history task been paused, should be dispatched to sink node if (output == NULL) { - if (qStreamRecoverScanFinished(exec)) { - finished = true; - } else { - qSetStreamOpOpen(exec); - if (streamTaskShouldPause(&pTask->status)) { - SStreamDataBlock* qRes = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, 0); - if (qRes == NULL) { - taosArrayDestroyEx(pRes, (FDelete)blockDataFreeRes); - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - - qRes->type = STREAM_INPUT__DATA_BLOCK; - qRes->blocks = pRes; - code = streamTaskOutputResultBlock(pTask, qRes); - if (code == TSDB_CODE_UTIL_QUEUE_OUT_OF_MEMORY) { - taosArrayDestroyEx(pRes, (FDelete)blockDataFreeRes); - taosFreeQitem(qRes); - return code; - } - return 0; - } - } + finished = qStreamRecoverScanFinished(exec); break; } @@ -223,86 +209,36 @@ int32_t streamScanExec(SStreamTask* pTask, int32_t batchSz) { block.info.childId = pTask->info.selfChildId; taosArrayPush(pRes, &block); - batchCnt++; - - qDebug("s-task:%s scan exec numOfBlocks:%d, limit:%d", pTask->id.idStr, batchCnt, batchSz); - if (batchCnt >= batchSz) { + if ((++numOfBlocks) >= batchSize) { + qDebug("s-task:%s scan exec numOfBlocks:%d, output limit:%d reached", pTask->id.idStr, numOfBlocks, batchSize); break; } } - if (taosArrayGetSize(pRes) == 0) { - taosArrayDestroy(pRes); - - if (finished) { - qDebug("s-task:%s finish recover exec task ", pTask->id.idStr); - break; - } else { - qDebug("s-task:%s continue recover exec task ", pTask->id.idStr); - continue; + if (taosArrayGetSize(pRes) > 0) { + SStreamDataBlock* qRes = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, 0); + if (qRes == NULL) { + taosArrayDestroyEx(pRes, (FDelete)blockDataFreeRes); + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; } - } - SStreamDataBlock* qRes = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, 0); - if (qRes == NULL) { - taosArrayDestroyEx(pRes, (FDelete)blockDataFreeRes); - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } + qRes->type = STREAM_INPUT__DATA_BLOCK; + qRes->blocks = pRes; - qRes->type = STREAM_INPUT__DATA_BLOCK; - qRes->blocks = pRes; - code = streamTaskOutputResultBlock(pTask, qRes); - if (code == TSDB_CODE_UTIL_QUEUE_OUT_OF_MEMORY) { - taosArrayDestroyEx(pRes, (FDelete)blockDataFreeRes); - taosFreeQitem(qRes); - return code; - } - - if (finished) { - break; - } - } - return 0; -} - -#if 0 -int32_t streamBatchExec(SStreamTask* pTask, int32_t batchLimit) { - // fetch all queue item, merge according to batchLimit - int32_t numOfItems = taosReadAllQitems(pTask->inputQueue1, pTask->inputQall); - if (numOfItems == 0) { - qDebug("task: %d, stream task exec over, queue empty", pTask->id.taskId); - return 0; - } - SStreamQueueItem* pMerged = NULL; - SStreamQueueItem* pItem = NULL; - taosGetQitem(pTask->inputQall, (void**)&pItem); - if (pItem == NULL) { - if (pMerged != NULL) { - // process merged item + code = streamTaskOutputResultBlock(pTask, qRes); + if (code == TSDB_CODE_UTIL_QUEUE_OUT_OF_MEMORY) { + taosArrayDestroyEx(pRes, (FDelete)blockDataFreeRes); + taosFreeQitem(qRes); + return code; + } } else { - return 0; + taosArrayDestroy(pRes); } } - // if drop - if (pItem->type == STREAM_INPUT__DESTROY) { - // set status drop - return -1; - } - - if (pTask->info.taskLevel == TASK_LEVEL__SINK) { - ASSERT(((SStreamQueueItem*)pItem)->type == STREAM_INPUT__DATA_BLOCK); - streamTaskOutputResultBlock(pTask, (SStreamDataBlock*)pItem); - } - - // exec impl - - // output - // try dispatch return 0; } -#endif int32_t updateCheckPointInfo(SStreamTask* pTask) { int64_t ckId = 0; @@ -345,39 +281,45 @@ static void waitForTaskIdle(SStreamTask* pTask, SStreamTask* pStreamTask) { double el = (taosGetTimestampMs() - st) / 1000.0; if (el > 0) { - qDebug("s-task:%s wait for stream task:%s for %.2fs to handle all data in inputQ", pTask->id.idStr, + qDebug("s-task:%s wait for stream task:%s for %.2fs to be idle", pTask->id.idStr, pStreamTask->id.idStr, el); } } -static int32_t streamTransferStateToStreamTask(SStreamTask* pTask) { - SStreamTask* pStreamTask = streamMetaAcquireTask(pTask->pMeta, pTask->streamTaskId.taskId); +static int32_t streamDoTransferStateToStreamTask(SStreamTask* pTask) { + SStreamMeta* pMeta = pTask->pMeta; + + SStreamTask* pStreamTask = streamMetaAcquireTask(pMeta, pTask->streamTaskId.taskId); if (pStreamTask == NULL) { - qError("s-task:%s failed to find related stream task:0x%x, it may have been destroyed or closed", - pTask->id.idStr, pTask->streamTaskId.taskId); + // todo: destroy the fill-history task here + qError("s-task:%s failed to find related stream task:0x%x, it may have been destroyed or closed", pTask->id.idStr, + pTask->streamTaskId.taskId); return TSDB_CODE_STREAM_TASK_NOT_EXIST; } else { qDebug("s-task:%s fill-history task end, update related stream task:%s info, transfer exec state", pTask->id.idStr, pStreamTask->id.idStr); } - ASSERT(pStreamTask != NULL && pStreamTask->historyTaskId.taskId == pTask->id.taskId); + ASSERT(pStreamTask->historyTaskId.taskId == pTask->id.taskId && pTask->status.transferState == true); + STimeWindow* pTimeWindow = &pStreamTask->dataRange.window; + // todo. the dropping status should be append to the status after the halt completed. // It must be halted for a source stream task, since when the related scan-history-data task start scan the history - // for the step 2. For a agg task + // for the step 2. + int8_t status = pStreamTask->status.taskStatus; if (pStreamTask->info.taskLevel == TASK_LEVEL__SOURCE) { - ASSERT(pStreamTask->status.taskStatus == TASK_STATUS__HALT); + ASSERT(status == TASK_STATUS__HALT || status == TASK_STATUS__DROPPING); } else { - ASSERT(pStreamTask->status.taskStatus == TASK_STATUS__NORMAL); + ASSERT(status == TASK_STATUS__SCAN_HISTORY); pStreamTask->status.taskStatus = TASK_STATUS__HALT; - qDebug("s-task:%s status: halt by related fill history task:%s", pStreamTask->id.idStr, pTask->id.idStr); + qDebug("s-task:%s halt by related fill-history task:%s", pStreamTask->id.idStr, pTask->id.idStr); } - // wait for the stream task to be idle + // wait for the stream task to handle all in the inputQ, and to be idle waitForTaskIdle(pTask, pStreamTask); - // In case of sink tasks, no need to be halted for them. + // In case of sink tasks, no need to halt them. // In case of source tasks and agg tasks, we should HALT them, and wait for them to be idle. And then, it's safe to // start the task state transfer procedure. // When a task is idle with halt status, all data in inputQ are consumed. @@ -391,21 +333,63 @@ static int32_t streamTransferStateToStreamTask(SStreamTask* pTask) { qDebug("s-task:%s no need to update time window for non-source task", pStreamTask->id.idStr); } - // expand the query time window for stream scanner + // 1. expand the query time window for stream task of WAL scanner pTimeWindow->skey = INT64_MIN; - qResetStreamInfoTimeWindow(pStreamTask->exec.pExecutor); + qStreamInfoResetTimewindowFilter(pStreamTask->exec.pExecutor); - // transfer the ownership of executor state + // 2. transfer the ownership of executor state streamTaskReleaseState(pTask); streamTaskReloadState(pStreamTask); - streamSetStatusNormal(pStreamTask); + // 3. clear the link between fill-history task and stream task info + pStreamTask->historyTaskId.taskId = 0; + + // 4. resume the state of stream task, after this function, the stream task will run immidately. But it can not be + // pause, since the pause allowed attribute is not set yet. + streamTaskResumeFromHalt(pStreamTask); + + qDebug("s-task:%s fill-history task set status to be dropping, save the state into disk", pTask->id.idStr); + int32_t taskId = pTask->id.taskId; + + // 5. free it and remove fill-history task from disk meta-store + streamMetaUnregisterTask(pMeta, taskId); + + // 6. save to disk + taosWLockLatch(&pMeta->lock); + streamMetaSaveTask(pMeta, pStreamTask); + if (streamMetaCommit(pMeta) < 0) { + // persist to disk + } + taosWUnLockLatch(&pMeta->lock); + + // 7. pause allowed. + streamTaskEnablePause(pStreamTask); streamSchedExec(pStreamTask); - streamMetaReleaseTask(pTask->pMeta, pStreamTask); + streamMetaReleaseTask(pMeta, pStreamTask); return TSDB_CODE_SUCCESS; } +static int32_t streamTransferStateToStreamTask(SStreamTask* pTask) { + int32_t code = TSDB_CODE_SUCCESS; + if (!pTask->status.transferState) { + return code; + } + + int32_t level = pTask->info.taskLevel; + if (level == TASK_LEVEL__SOURCE) { + streamTaskFillHistoryFinished(pTask); + streamTaskEndScanWAL(pTask); + } else if (level == TASK_LEVEL__AGG) { // do transfer task operator states. + code = streamDoTransferStateToStreamTask(pTask); + if (code != TSDB_CODE_SUCCESS) { // todo handle this + return code; + } + } + + return code; +} + static int32_t extractMsgFromInputQ(SStreamTask* pTask, SStreamQueueItem** pInput, int32_t* numOfBlocks, const char* id) { int32_t retryTimes = 0; @@ -443,7 +427,12 @@ static int32_t extractMsgFromInputQ(SStreamTask* pTask, SStreamQueueItem** pInpu // todo we need to sort the data block, instead of just appending into the array list. void* newRet = streamMergeQueueItem(*pInput, qItem); if (newRet == NULL) { - qError("s-task:%s failed to merge blocks from inputQ, numOfBlocks:%d", id, *numOfBlocks); + if (terrno == 0) { + qDebug("s-task:%s failed to merge blocks from inputQ, numOfBlocks:%d", id, *numOfBlocks); + } else { + qDebug("s-task:%s failed to merge blocks from inputQ, numOfBlocks:%d, code:%s", id, *numOfBlocks, + tstrerror(terrno)); + } streamQueueProcessFail(pTask->inputQueue); return TSDB_CODE_SUCCESS; } @@ -471,6 +460,10 @@ int32_t streamExecForAll(SStreamTask* pTask) { while (1) { int32_t batchSize = 0; SStreamQueueItem* pInput = NULL; + if (streamTaskShouldStop(&pTask->status)) { + qDebug("s-task:%s stream task stopped, abort", id); + break; + } // merge multiple input data if possible in the input queue. qDebug("s-task:%s start to extract data block from inputQ", id); @@ -478,14 +471,6 @@ int32_t streamExecForAll(SStreamTask* pTask) { /*int32_t code = */extractMsgFromInputQ(pTask, &pInput, &batchSize, id); if (pInput == NULL) { ASSERT(batchSize == 0); - if (pTask->info.fillHistory && pTask->status.transferState) { - int32_t code = streamTransferStateToStreamTask(pTask); - pTask->status.transferState = false; // reset this value, to avoid transfer state again - if (code != TSDB_CODE_SUCCESS) { // todo handle this - return 0; - } - } - break; } @@ -550,22 +535,28 @@ int32_t streamExecForAll(SStreamTask* pTask) { } bool streamTaskIsIdle(const SStreamTask* pTask) { - int32_t numOfItems = taosQueueItemSize(pTask->inputQueue->queue); - if (numOfItems > 0) { - return false; - } + return (pTask->status.schedStatus == TASK_SCHED_STATUS__INACTIVE); +} - numOfItems = taosQallItemSize(pTask->inputQueue->qall); - if (numOfItems > 0) { - return false; +int32_t streamTaskEndScanWAL(SStreamTask* pTask) { + const char* id = pTask->id.idStr; + double el = (taosGetTimestampMs() - pTask->tsInfo.step2Start) / 1000.0; + qDebug("s-task:%s scan-history from WAL stage(step 2) ended, elapsed time:%.2fs", id, el); + + // 1. notify all downstream tasks to transfer executor state after handle all history blocks. + int32_t code = streamDispatchTransferStateMsg(pTask); + if (code != TSDB_CODE_SUCCESS) { + // todo handle error } - // blocked by downstream task - if (pTask->outputStatus == TASK_OUTPUT_STATUS__BLOCKED) { - return false; + // 2. do transfer stream task operator states. + pTask->status.transferState = true; + code = streamDoTransferStateToStreamTask(pTask); + if (code != TSDB_CODE_SUCCESS) { // todo handle error + return code; } - return (pTask->status.schedStatus == TASK_SCHED_STATUS__INACTIVE); + return TSDB_CODE_SUCCESS; } int32_t streamTryExec(SStreamTask* pTask) { @@ -573,6 +564,8 @@ int32_t streamTryExec(SStreamTask* pTask) { int8_t schedStatus = atomic_val_compare_exchange_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__WAITING, TASK_SCHED_STATUS__ACTIVE); + const char* id = pTask->id.idStr; + if (schedStatus == TASK_SCHED_STATUS__WAITING) { int32_t code = streamExecForAll(pTask); if (code < 0) { // todo this status shoudl be removed @@ -581,14 +574,31 @@ int32_t streamTryExec(SStreamTask* pTask) { } // todo the task should be commit here - atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); - qDebug("s-task:%s exec completed, status:%s, sched-status:%d", pTask->id.idStr, streamGetTaskStatusStr(pTask->status.taskStatus), - pTask->status.schedStatus); + if (taosQueueEmpty(pTask->inputQueue->queue)) { + // fill-history WAL scan has completed + if (pTask->status.transferState) { + code = streamTransferStateToStreamTask(pTask); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + streamSchedExec(pTask); + } else { + atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); + qDebug("s-task:%s exec completed, status:%s, sched-status:%d", id, streamGetTaskStatusStr(pTask->status.taskStatus), + pTask->status.schedStatus); + } + } else { + atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); + qDebug("s-task:%s exec completed, status:%s, sched-status:%d", id, streamGetTaskStatusStr(pTask->status.taskStatus), + pTask->status.schedStatus); - if (!taosQueueEmpty(pTask->inputQueue->queue) && (!streamTaskShouldStop(&pTask->status)) && - (!streamTaskShouldPause(&pTask->status))) { - streamSchedExec(pTask); + if ((!streamTaskShouldStop(&pTask->status)) && (!streamTaskShouldPause(&pTask->status))) { + streamSchedExec(pTask); + } } + } else { + qDebug("s-task:%s already started to exec by other thread, status:%s, sched-status:%d", id, + streamGetTaskStatusStr(pTask->status.taskStatus), pTask->status.schedStatus); } return 0; diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index a2b5d0e396bb6452843db4f290f91bbcdda73aa3..ae077388685c0c732d956a466016ea2660db63c2 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -217,6 +217,7 @@ int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask) { tEncoderClear(&encoder); if (tdbTbUpsert(pMeta->pTaskDb, &pTask->id.taskId, sizeof(int32_t), buf, len, pMeta->txn) < 0) { + qError("s-task:%s save to disk failed, code:%s", pTask->id.idStr, tstrerror(terrno)); return -1; } @@ -224,8 +225,21 @@ int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask) { return 0; } +int32_t streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId) { + int32_t code = tdbTbDelete(pMeta->pTaskDb, &taskId, sizeof(taskId), pMeta->txn); + if (code != 0) { + qError("vgId:%d failed to remove task:0x%x from metastore, code:%s", pMeta->vgId, taskId, tstrerror(terrno)); + } else { + qDebug("vgId:%d remove task:0x%x from metastore", pMeta->vgId, taskId); + } + + return code; +} + // add to the ready tasks hash map, not the restored tasks hash map -int32_t streamMetaAddDeployedTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* pTask) { +int32_t streamMetaRegisterTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* pTask, bool* pAdded) { + *pAdded = false; + void* p = taosHashGet(pMeta->pTasks, &pTask->id.taskId, sizeof(pTask->id.taskId)); if (p == NULL) { if (pMeta->expandFunc(pMeta->ahandle, pTask, ver) < 0) { @@ -233,6 +247,8 @@ int32_t streamMetaAddDeployedTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* return -1; } + taosArrayPush(pMeta->pTaskList, &pTask->id.taskId); + if (streamMetaSaveTask(pMeta, pTask) < 0) { tFreeStreamTask(pTask); return -1; @@ -242,19 +258,18 @@ int32_t streamMetaAddDeployedTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* tFreeStreamTask(pTask); return -1; } - taosArrayPush(pMeta->pTaskList, &pTask->id.taskId); } else { return 0; } taosHashPut(pMeta->pTasks, &pTask->id.taskId, sizeof(pTask->id.taskId), &pTask, POINTER_BYTES); + *pAdded = true; return 0; } -int32_t streamMetaGetNumOfTasks(const SStreamMeta* pMeta) { +int32_t streamMetaGetNumOfTasks(SStreamMeta* pMeta) { size_t size = taosHashGetSize(pMeta->pTasks); ASSERT(taosArrayGetSize(pMeta->pTaskList) == taosHashGetSize(pMeta->pTasks)); - return (int32_t)size; } @@ -266,7 +281,7 @@ SStreamTask* streamMetaAcquireTask(SStreamMeta* pMeta, int32_t taskId) { if (!streamTaskShouldStop(&(*ppTask)->status)) { int32_t ref = atomic_add_fetch_32(&(*ppTask)->refCnt, 1); taosRUnLockLatch(&pMeta->lock); - qDebug("s-task:%s acquire task, ref:%d", (*ppTask)->id.idStr, ref); + qTrace("s-task:%s acquire task, ref:%d", (*ppTask)->id.idStr, ref); return *ppTask; } } @@ -278,9 +293,10 @@ SStreamTask* streamMetaAcquireTask(SStreamMeta* pMeta, int32_t taskId) { void streamMetaReleaseTask(SStreamMeta* pMeta, SStreamTask* pTask) { int32_t ref = atomic_sub_fetch_32(&pTask->refCnt, 1); if (ref > 0) { - qDebug("s-task:%s release task, ref:%d", pTask->id.idStr, ref); + qTrace("s-task:%s release task, ref:%d", pTask->id.idStr, ref); } else if (ref == 0) { ASSERT(streamTaskShouldStop(&pTask->status)); + qTrace("s-task:%s all refs are gone, free it", pTask->id.idStr); tFreeStreamTask(pTask); } else if (ref < 0) { qError("task ref is invalid, ref:%d, %s", ref, pTask->id.idStr); @@ -297,7 +313,7 @@ static void doRemoveIdFromList(SStreamMeta* pMeta, int32_t num, int32_t taskId) } } -void streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId) { +int32_t streamMetaUnregisterTask(SStreamMeta* pMeta, int32_t taskId) { SStreamTask* pTask = NULL; // pre-delete operation @@ -309,7 +325,7 @@ void streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId) { } else { qDebug("vgId:%d failed to find the task:0x%x, it may be dropped already", pMeta->vgId, taskId); taosWUnLockLatch(&pMeta->lock); - return; + return 0; } taosWUnLockLatch(&pMeta->lock); @@ -339,27 +355,26 @@ void streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId) { ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, &taskId, sizeof(int32_t)); if (ppTask) { taosHashRemove(pMeta->pTasks, &taskId, sizeof(int32_t)); - tdbTbDelete(pMeta->pTaskDb, &taskId, sizeof(int32_t), pMeta->txn); - atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__DROPPING); + ASSERT(pTask->status.timerActive == 0); int32_t num = taosArrayGetSize(pMeta->pTaskList); - qDebug("s-task:%s set the drop task flag, remain running s-task:%d", pTask->id.idStr, num - 1); doRemoveIdFromList(pMeta, num, pTask->id.taskId); // remove the ref by timer if (pTask->triggerParam != 0) { taosTmrStop(pTask->schedTimer); - streamMetaReleaseTask(pMeta, pTask); } + streamMetaRemoveTask(pMeta, taskId); streamMetaReleaseTask(pMeta, pTask); } else { qDebug("vgId:%d failed to find the task:0x%x, it may have been dropped already", pMeta->vgId, taskId); } taosWUnLockLatch(&pMeta->lock); + return 0; } int32_t streamMetaBegin(SStreamMeta* pMeta) { @@ -404,7 +419,9 @@ int32_t streamMetaAbort(SStreamMeta* pMeta) { int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver) { TBC* pCur = NULL; + if (tdbTbcOpen(pMeta->pTaskDb, &pCur, NULL) < 0) { + qError("vgId:%d failed to open stream meta, code:%s", pMeta->vgId, tstrerror(terrno)); return -1; } @@ -413,6 +430,7 @@ int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver) { void* pVal = NULL; int32_t vLen = 0; SDecoder decoder; + SArray* pRecycleList = taosArrayInit(4, sizeof(int32_t)); tdbTbcMoveToFirst(pCur); @@ -422,6 +440,7 @@ int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver) { tdbFree(pKey); tdbFree(pVal); tdbTbcClose(pCur); + taosArrayDestroy(pRecycleList); return -1; } @@ -429,16 +448,29 @@ int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver) { tDecodeStreamTask(&decoder, pTask); tDecoderClear(&decoder); - // remove duplicate + if (pTask->status.taskStatus == TASK_STATUS__DROPPING) { + int32_t taskId = pTask->id.taskId; + tFreeStreamTask(pTask); + + taosArrayPush(pRecycleList, &taskId); + + int32_t total = taosArrayGetSize(pRecycleList); + qDebug("s-task:0x%x is already dropped, add into recycle list, total:%d", taskId, total); + continue; + } + + // do duplicate task check. void* p = taosHashGet(pMeta->pTasks, &pTask->id.taskId, sizeof(pTask->id.taskId)); if (p == NULL) { if (pMeta->expandFunc(pMeta->ahandle, pTask, pTask->chkInfo.version) < 0) { tdbFree(pKey); tdbFree(pVal); tdbTbcClose(pCur); - taosMemoryFree(pTask); + tFreeStreamTask(pTask); + taosArrayDestroy(pRecycleList); return -1; } + taosArrayPush(pMeta->pTaskList, &pTask->id.taskId); } else { tdbFree(pKey); @@ -452,7 +484,8 @@ int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver) { tdbFree(pKey); tdbFree(pVal); tdbTbcClose(pCur); - taosMemoryFree(pTask); + tFreeStreamTask(pTask); + taosArrayDestroy(pRecycleList); return -1; } @@ -462,8 +495,18 @@ int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver) { tdbFree(pKey); tdbFree(pVal); if (tdbTbcClose(pCur) < 0) { + taosArrayDestroy(pRecycleList); return -1; } + if (taosArrayGetSize(pRecycleList) > 0) { + for(int32_t i = 0; i < taosArrayGetSize(pRecycleList); ++i) { + int32_t taskId = *(int32_t*) taosArrayGet(pRecycleList, i); + streamMetaRemoveTask(pMeta, taskId); + } + } + + qDebug("vgId:%d load %d task from disk", pMeta->vgId, (int32_t) taosArrayGetSize(pMeta->pTaskList)); + taosArrayDestroy(pRecycleList); return 0; } diff --git a/source/libs/stream/src/streamRecover.c b/source/libs/stream/src/streamRecover.c index f51efb23d1e85f96f55fd62e92d5e96b9d670fc2..bd2d67e14ae3121ef2f4bb1e63d22c83da6e5cc1 100644 --- a/source/libs/stream/src/streamRecover.c +++ b/source/libs/stream/src/streamRecover.c @@ -17,11 +17,30 @@ #include "ttimer.h" #include "wal.h" -int32_t streamStartRecoverTask(SStreamTask* pTask, int8_t igUntreated) { +typedef struct SStreamTaskRetryInfo { + SStreamMeta* pMeta; + int32_t taskId; +} SStreamTaskRetryInfo; + +static int32_t streamSetParamForScanHistory(SStreamTask* pTask); +static void launchFillHistoryTask(SStreamTask* pTask); +static void streamTaskSetRangeStreamCalc(SStreamTask* pTask); +static int32_t initScanHistoryReq(SStreamTask* pTask, SStreamScanHistoryReq* pReq, int8_t igUntreated); + +static void streamTaskSetReady(SStreamTask* pTask, int32_t numOfReqs) { + ASSERT(pTask->status.downstreamReady == 0); + pTask->status.downstreamReady = 1; + + int64_t el = (taosGetTimestampMs() - pTask->tsInfo.init); + qDebug("s-task:%s all %d downstream ready, init completed, elapsed time:%dms, task status:%s", + pTask->id.idStr, numOfReqs, (int32_t) el, streamGetTaskStatusStr(pTask->status.taskStatus)); +} + +int32_t streamStartScanHistoryAsync(SStreamTask* pTask, int8_t igUntreated) { SStreamScanHistoryReq req; - streamBuildSourceRecover1Req(pTask, &req, igUntreated); - int32_t len = sizeof(SStreamScanHistoryReq); + initScanHistoryReq(pTask, &req, igUntreated); + int32_t len = sizeof(SStreamScanHistoryReq); void* serializedReq = rpcMallocCont(len); if (serializedReq == NULL) { return -1; @@ -50,14 +69,12 @@ const char* streamGetTaskStatusStr(int32_t status) { static int32_t doLaunchScanHistoryTask(SStreamTask* pTask) { SVersionRange* pRange = &pTask->dataRange.range; + if (pTask->info.fillHistory) { + streamSetParamForScanHistory(pTask); + } - qDebug("s-task:%s vgId:%d status:%s, start scan-history-data task, verRange:%" PRId64 " - %" PRId64, pTask->id.idStr, - pTask->info.nodeId, streamGetTaskStatusStr(pTask->status.taskStatus), pRange->minVer, pRange->maxVer); - - streamSetParamForScanHistory(pTask); streamSetParamForStreamScannerStep1(pTask, pRange, &pTask->dataRange.window); - - int32_t code = streamStartRecoverTask(pTask, 0); + int32_t code = streamStartScanHistoryAsync(pTask, 0); return code; } @@ -72,19 +89,20 @@ int32_t streamTaskLaunchScanHistory(SStreamTask* pTask) { walReaderGetCurrentVer(pTask->exec.pWalReader)); } } else if (pTask->info.taskLevel == TASK_LEVEL__AGG) { - streamSetStatusNormal(pTask); - streamSetParamForScanHistory(pTask); - streamAggScanHistoryPrepare(pTask); + if (pTask->info.fillHistory) { + streamSetParamForScanHistory(pTask); + } + streamTaskEnablePause(pTask); + streamTaskScanHistoryPrepare(pTask); } else if (pTask->info.taskLevel == TASK_LEVEL__SINK) { - streamSetStatusNormal(pTask); - qDebug("s-task:%s sink task convert to normal immediately", pTask->id.idStr); + qDebug("s-task:%s sink task do nothing to handle scan-history", pTask->id.idStr); + streamTaskScanHistoryPrepare(pTask); } - return 0; } // check status -int32_t streamTaskCheckDownstreamTasks(SStreamTask* pTask) { +int32_t streamTaskDoCheckDownstreamTasks(SStreamTask* pTask) { SHistDataRange* pRange = &pTask->dataRange; STimeWindow* pWindow = &pRange->window; @@ -96,7 +114,7 @@ int32_t streamTaskCheckDownstreamTasks(SStreamTask* pTask) { }; // serialize - if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) { + if (pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH) { req.reqId = tGenIdPI64(); req.downstreamNodeId = pTask->fixedEpDispatcher.nodeId; req.downstreamTaskId = pTask->fixedEpDispatcher.taskId; @@ -108,7 +126,7 @@ int32_t streamTaskCheckDownstreamTasks(SStreamTask* pTask) { pWindow->skey, pWindow->ekey, req.reqId); streamDispatchCheckMsg(pTask, &req, pTask->fixedEpDispatcher.nodeId, &pTask->fixedEpDispatcher.epSet); - } else if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) { + } else if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) { SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos; int32_t numOfVgs = taosArrayGetSize(vgInfo); @@ -129,11 +147,13 @@ int32_t streamTaskCheckDownstreamTasks(SStreamTask* pTask) { streamDispatchCheckMsg(pTask, &req, pVgInfo->vgId, &pVgInfo->epSet); } } else { - pTask->status.downstreamReady = 1; - qDebug("s-task:%s (vgId:%d) no downstream tasks, set downstream checked, try to launch scan-history-data, status:%s", - pTask->id.idStr, pTask->info.nodeId, streamGetTaskStatusStr(pTask->status.taskStatus)); + qDebug("s-task:%s (vgId:%d) set downstream ready, since no downstream", pTask->id.idStr, pTask->info.nodeId); + streamTaskSetReady(pTask, 0); + streamTaskSetRangeStreamCalc(pTask); streamTaskLaunchScanHistory(pTask); + + launchFillHistoryTask(pTask); } return 0; @@ -153,9 +173,9 @@ int32_t streamRecheckDownstream(SStreamTask* pTask, const SStreamTaskCheckRsp* p qDebug("s-task:%s (vgId:%d) check downstream task:0x%x (vgId:%d) (recheck)", pTask->id.idStr, pTask->info.nodeId, req.downstreamTaskId, req.downstreamNodeId); - if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) { + if (pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH) { streamDispatchCheckMsg(pTask, &req, pRsp->downstreamNodeId, &pTask->fixedEpDispatcher.epSet); - } else if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) { + } else if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) { SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos; int32_t numOfVgs = taosArrayGetSize(vgInfo); @@ -171,7 +191,28 @@ int32_t streamRecheckDownstream(SStreamTask* pTask, const SStreamTaskCheckRsp* p } int32_t streamTaskCheckStatus(SStreamTask* pTask) { - return atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__NORMAL? 1:0; + return (pTask->status.downstreamReady == 1)? 1:0; +} + +static void doProcessDownstreamReadyRsp(SStreamTask* pTask, int32_t numOfReqs) { + streamTaskSetReady(pTask, numOfReqs); + const char* id = pTask->id.idStr; + + int8_t status = pTask->status.taskStatus; + const char* str = streamGetTaskStatusStr(status); + + ASSERT(status == TASK_STATUS__SCAN_HISTORY || status == TASK_STATUS__NORMAL); + streamTaskSetRangeStreamCalc(pTask); + + if (status == TASK_STATUS__SCAN_HISTORY) { + qDebug("s-task:%s enter into scan-history data stage, status:%s", id, str); + streamTaskLaunchScanHistory(pTask); + } else { + qDebug("s-task:%s downstream tasks are ready, now ready for data from wal, status:%s", id, str); + } + + // when current stream task is ready, check the related fill history task. + launchFillHistoryTask(pTask); } int32_t streamProcessCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRsp) { @@ -179,7 +220,7 @@ int32_t streamProcessCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRs const char* id = pTask->id.idStr; if (pRsp->status == 1) { - if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) { + if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) { bool found = false; int32_t numOfReqs = taosArrayGetSize(pTask->checkReqIds); @@ -201,41 +242,20 @@ int32_t streamProcessCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRs if (left == 0) { taosArrayDestroy(pTask->checkReqIds); pTask->checkReqIds = NULL; - pTask->status.downstreamReady = 1; - - if (pTask->status.taskStatus == TASK_STATUS__SCAN_HISTORY) { - qDebug("s-task:%s all %d downstream tasks are ready, now enter into scan-history-data stage, status:%s", id, - numOfReqs, streamGetTaskStatusStr(pTask->status.taskStatus)); - streamTaskLaunchScanHistory(pTask); - } else { - ASSERT(pTask->status.taskStatus == TASK_STATUS__NORMAL); - qDebug("s-task:%s fixed downstream task is ready, now ready for data from wal, status:%s", id, - streamGetTaskStatusStr(pTask->status.taskStatus)); - } + + doProcessDownstreamReadyRsp(pTask, numOfReqs); } else { int32_t total = taosArrayGetSize(pTask->shuffleDispatcher.dbInfo.pVgroupInfos); qDebug("s-task:%s (vgId:%d) recv check rsp from task:0x%x (vgId:%d) status:%d, total:%d not ready:%d", id, pRsp->upstreamNodeId, pRsp->downstreamTaskId, pRsp->downstreamNodeId, pRsp->status, total, left); } } else { - ASSERT(pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH); + ASSERT(pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH); if (pRsp->reqId != pTask->checkReqId) { return -1; } - // set the downstream tasks have been checked flag - ASSERT(pTask->status.downstreamReady == 0); - pTask->status.downstreamReady = 1; - - ASSERT(pTask->status.taskStatus == TASK_STATUS__SCAN_HISTORY || pTask->status.taskStatus == TASK_STATUS__NORMAL); - if (pTask->status.taskStatus == TASK_STATUS__SCAN_HISTORY) { - qDebug("s-task:%s fixed downstream task is ready, now enter into scan-history-data stage, status:%s", id, - streamGetTaskStatusStr(pTask->status.taskStatus)); - streamTaskLaunchScanHistory(pTask); - } else { - qDebug("s-task:%s fixed downstream task is ready, ready for data from inputQ, status:%s", id, - streamGetTaskStatusStr(pTask->status.taskStatus)); - } + doProcessDownstreamReadyRsp(pTask, 1); } } else { // not ready, wait for 100ms and retry qDebug("s-task:%s downstream taskId:0x%x (vgId:%d) not ready, wait for 100ms and retry", id, pRsp->downstreamTaskId, @@ -248,14 +268,40 @@ int32_t streamProcessCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRs return 0; } +int32_t streamSendCheckRsp(const SStreamMeta* pMeta, const SStreamTaskCheckReq* pReq, SStreamTaskCheckRsp* pRsp, + SRpcHandleInfo *pRpcInfo, int32_t taskId) { + SEncoder encoder; + int32_t code; + int32_t len; + + tEncodeSize(tEncodeStreamTaskCheckRsp, pRsp, len, code); + if (code < 0) { + qError("vgId:%d failed to encode task check rsp, s-task:0x%x", pMeta->vgId, taskId); + return -1; + } + + void* buf = rpcMallocCont(sizeof(SMsgHead) + len); + ((SMsgHead*)buf)->vgId = htonl(pReq->upstreamNodeId); + + void* abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); + tEncoderInit(&encoder, (uint8_t*)abuf, len); + tEncodeStreamTaskCheckRsp(&encoder, pRsp); + tEncoderClear(&encoder); + + SRpcMsg rspMsg = {.code = 0, .pCont = buf, .contLen = sizeof(SMsgHead) + len, .info = *pRpcInfo}; + + tmsgSendRsp(&rspMsg); + return 0; +} + // common int32_t streamSetParamForScanHistory(SStreamTask* pTask) { - qDebug("s-task:%s set operator option for scan-history-data", pTask->id.idStr); + qDebug("s-task:%s set operator option for scan-history data", pTask->id.idStr); return qSetStreamOperatorOptionForScanHistory(pTask->exec.pExecutor); } int32_t streamRestoreParam(SStreamTask* pTask) { - qDebug("s-task:%s restore operator param after scan-history-data", pTask->id.idStr); + qDebug("s-task:%s restore operator param after scan-history", pTask->id.idStr); return qRestoreStreamOperatorOption(pTask->exec.pExecutor); } @@ -280,7 +326,7 @@ int32_t streamSetParamForStreamScannerStep2(SStreamTask* pTask, SVersionRange *p return qStreamSourceScanParamForHistoryScanStep2(pTask->exec.pExecutor, pVerRange, pWindow); } -int32_t streamBuildSourceRecover1Req(SStreamTask* pTask, SStreamScanHistoryReq* pReq, int8_t igUntreated) { +int32_t initScanHistoryReq(SStreamTask* pTask, SStreamScanHistoryReq* pReq, int8_t igUntreated) { pReq->msgHead.vgId = pTask->info.nodeId; pReq->streamId = pTask->id.streamId; pReq->taskId = pTask->id.taskId; @@ -293,23 +339,33 @@ int32_t streamSourceScanHistoryData(SStreamTask* pTask) { } int32_t streamDispatchScanHistoryFinishMsg(SStreamTask* pTask) { - SStreamScanHistoryFinishReq req = { .streamId = pTask->id.streamId, .childId = pTask->info.selfChildId }; + SStreamScanHistoryFinishReq req = { + .streamId = pTask->id.streamId, + .childId = pTask->info.selfChildId, + .upstreamTaskId = pTask->id.taskId, + .upstreamNodeId = pTask->pMeta->vgId, + }; // serialize - if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) { - req.taskId = pTask->fixedEpDispatcher.taskId; + if (pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH) { + req.downstreamTaskId = pTask->fixedEpDispatcher.taskId; + pTask->notReadyTasks = 1; streamDoDispatchScanHistoryFinishMsg(pTask, &req, pTask->fixedEpDispatcher.nodeId, &pTask->fixedEpDispatcher.epSet); - } else if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) { + } else if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) { SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos; int32_t numOfVgs = taosArrayGetSize(vgInfo); + pTask->notReadyTasks = numOfVgs; - qDebug("s-task:%s send scan-history-data complete msg to downstream (shuffle-dispatch) %d tasks, status:%s", pTask->id.idStr, + qDebug("s-task:%s send scan-history data complete msg to downstream (shuffle-dispatch) %d tasks, status:%s", pTask->id.idStr, numOfVgs, streamGetTaskStatusStr(pTask->status.taskStatus)); for (int32_t i = 0; i < numOfVgs; i++) { SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i); - req.taskId = pVgInfo->taskId; + req.downstreamTaskId = pVgInfo->taskId; streamDoDispatchScanHistoryFinishMsg(pTask, &req, pVgInfo->vgId, &pVgInfo->epSet); } + } else { + qDebug("s-task:%s no downstream tasks, invoke scan-history finish rsp directly", pTask->id.idStr); + streamProcessScanHistoryFinishRsp(pTask); } return 0; @@ -353,7 +409,7 @@ static int32_t doDispatchTransferMsg(SStreamTask* pTask, const SStreamTransferRe tmsgSendReq(pEpSet, &msg); qDebug("s-task:%s level:%d, status:%s dispatch transfer state msg to taskId:0x%x (vgId:%d)", pTask->id.idStr, - pTask->info.taskLevel, streamGetTaskStatusStr(pTask->status.taskStatus), pReq->taskId, vgId); + pTask->info.taskLevel, streamGetTaskStatusStr(pTask->status.taskStatus), pReq->downstreamTaskId, vgId); return 0; } @@ -362,16 +418,16 @@ int32_t streamDispatchTransferStateMsg(SStreamTask* pTask) { SStreamTransferReq req = { .streamId = pTask->id.streamId, .childId = pTask->info.selfChildId }; // serialize - if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) { - req.taskId = pTask->fixedEpDispatcher.taskId; + if (pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH) { + req.downstreamTaskId = pTask->fixedEpDispatcher.taskId; doDispatchTransferMsg(pTask, &req, pTask->fixedEpDispatcher.nodeId, &pTask->fixedEpDispatcher.epSet); - } else if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) { + } else if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) { SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos; int32_t numOfVgs = taosArrayGetSize(vgInfo); for (int32_t i = 0; i < numOfVgs; i++) { SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i); - req.taskId = pVgInfo->taskId; + req.downstreamTaskId = pVgInfo->taskId; doDispatchTransferMsg(pTask, &req, pVgInfo->vgId, &pVgInfo->epSet); } } @@ -380,16 +436,17 @@ int32_t streamDispatchTransferStateMsg(SStreamTask* pTask) { } // agg -int32_t streamAggScanHistoryPrepare(SStreamTask* pTask) { +int32_t streamTaskScanHistoryPrepare(SStreamTask* pTask) { pTask->numOfWaitingUpstream = taosArrayGetSize(pTask->pUpstreamEpInfoList); - qDebug("s-task:%s agg task is ready and wait for %d upstream tasks complete scan-history procedure", pTask->id.idStr, - pTask->numOfWaitingUpstream); + qDebug("s-task:%s level:%d task wait for %d upstream tasks complete scan-history procedure, status:%s", + pTask->id.idStr, pTask->info.taskLevel, pTask->numOfWaitingUpstream, + streamGetTaskStatusStr(pTask->status.taskStatus)); return 0; } int32_t streamAggUpstreamScanHistoryFinish(SStreamTask* pTask) { void* exec = pTask->exec.pExecutor; - if (qRestoreStreamOperatorOption(exec) < 0) { + if (pTask->info.fillHistory && qRestoreStreamOperatorOption(exec) < 0) { return -1; } @@ -399,33 +456,70 @@ int32_t streamAggUpstreamScanHistoryFinish(SStreamTask* pTask) { return 0; } -int32_t streamProcessScanHistoryFinishReq(SStreamTask* pTask, int32_t taskId, int32_t childId) { - if (pTask->info.taskLevel == TASK_LEVEL__AGG) { - int32_t left = atomic_sub_fetch_32(&pTask->numOfWaitingUpstream, 1); - ASSERT(left >= 0); +int32_t streamProcessScanHistoryFinishReq(SStreamTask* pTask, SStreamScanHistoryFinishReq* pReq, + SRpcHandleInfo* pRpcInfo) { + int32_t taskLevel = pTask->info.taskLevel; + ASSERT(taskLevel == TASK_LEVEL__AGG || taskLevel == TASK_LEVEL__SINK); + + // sink node do not send end of scan history msg to its upstream, which is agg task. + streamAddEndScanHistoryMsg(pTask, pRpcInfo, pReq); - if (left == 0) { - int32_t numOfTasks = taosArrayGetSize(pTask->pUpstreamEpInfoList); - qDebug("s-task:%s all %d upstream tasks finish scan-history data, set param for agg task for stream data", - pTask->id.idStr, numOfTasks); + int32_t left = atomic_sub_fetch_32(&pTask->numOfWaitingUpstream, 1); + ASSERT(left >= 0); + if (left == 0) { + int32_t numOfTasks = taosArrayGetSize(pTask->pUpstreamEpInfoList); + qDebug( + "s-task:%s all %d upstream tasks finish scan-history data, set param for agg task for stream data and send " + "rsp to all upstream tasks", + pTask->id.idStr, numOfTasks); + + if (pTask->info.taskLevel == TASK_LEVEL__AGG) { streamAggUpstreamScanHistoryFinish(pTask); - } else { - qDebug("s-task:%s receive scan-history data finish msg from upstream:0x%x(index:%d), unfinished:%d", - pTask->id.idStr, taskId, childId, left); } + streamNotifyUpstreamContinue(pTask); + + // sink node does not receive the pause msg from mnode, so does not need enable it + if (pTask->info.taskLevel == TASK_LEVEL__AGG) { + streamTaskEnablePause(pTask); + } + } else { + qDebug("s-task:%s receive scan-history data finish msg from upstream:0x%x(index:%d), unfinished:%d", + pTask->id.idStr, pReq->upstreamTaskId, pReq->childId, left); } return 0; } +int32_t streamProcessScanHistoryFinishRsp(SStreamTask* pTask) { + ASSERT(pTask->status.taskStatus == TASK_STATUS__SCAN_HISTORY); + SStreamMeta* pMeta = pTask->pMeta; + + // execute in the scan history complete call back msg, ready to process data from inputQ + streamSetStatusNormal(pTask); + atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); + + taosWLockLatch(&pMeta->lock); + streamMetaSaveTask(pMeta, pTask); + taosWUnLockLatch(&pMeta->lock); + + // history data scan in the stream time window finished, now let's enable the pause + streamTaskEnablePause(pTask); + + if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { + streamSchedExec(pTask); + } + + return TSDB_CODE_SUCCESS; +} + static void doCheckDownstreamStatus(SStreamTask* pTask, SStreamTask* pHTask) { pHTask->dataRange.range.minVer = 0; pHTask->dataRange.range.maxVer = pTask->chkInfo.currentVer; if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { - qDebug("s-task:%s set the launch condition for fill history s-task:%s, window:%" PRId64 " - %" PRId64 + qDebug("s-task:%s set the launch condition for fill-history s-task:%s, window:%" PRId64 " - %" PRId64 " ver range:%" PRId64 " - %" PRId64, pTask->id.idStr, pHTask->id.idStr, pHTask->dataRange.window.skey, pHTask->dataRange.window.ekey, pHTask->dataRange.range.minVer, pHTask->dataRange.range.maxVer); @@ -434,14 +528,9 @@ static void doCheckDownstreamStatus(SStreamTask* pTask, SStreamTask* pHTask) { } // check if downstream tasks have been ready - streamTaskCheckDownstreamTasks(pHTask); + streamTaskDoCheckDownstreamTasks(pHTask); } -typedef struct SStreamTaskRetryInfo { - SStreamMeta* pMeta; - int32_t taskId; -} SStreamTaskRetryInfo; - static void tryLaunchHistoryTask(void* param, void* tmrId) { SStreamTaskRetryInfo* pInfo = param; SStreamMeta* pMeta = pInfo->pMeta; @@ -500,7 +589,7 @@ static void tryLaunchHistoryTask(void* param, void* tmrId) { // todo fix the bug: 2. race condition // an fill history task needs to be started. -int32_t streamCheckHistoryTaskDownstream(SStreamTask* pTask) { +int32_t streamLaunchFillHistoryTask(SStreamTask* pTask) { SStreamMeta* pMeta = pTask->pMeta; int32_t hTaskId = pTask->historyTaskId.taskId; @@ -538,72 +627,55 @@ int32_t streamCheckHistoryTaskDownstream(SStreamTask* pTask) { } int32_t streamTaskScanHistoryDataComplete(SStreamTask* pTask) { - SStreamMeta* pMeta = pTask->pMeta; if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING) { return 0; } // restore param - int32_t code = streamRestoreParam(pTask); - if (code < 0) { - return -1; + int32_t code = 0; + if (pTask->info.fillHistory) { + code = streamRestoreParam(pTask); + if (code < 0) { + return -1; + } } - // dispatch recover finish req to all related downstream task + // dispatch scan-history finish req to all related downstream task code = streamDispatchScanHistoryFinishMsg(pTask); if (code < 0) { return -1; } - ASSERT(pTask->status.taskStatus == TASK_STATUS__SCAN_HISTORY); - - // ready to process data from inputQ - streamSetStatusNormal(pTask); - atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); - - taosWLockLatch(&pMeta->lock); - streamMetaSaveTask(pMeta, pTask); - taosWUnLockLatch(&pMeta->lock); - return 0; } -bool streamTaskRecoverScanStep1Finished(SStreamTask* pTask) { - void* exec = pTask->exec.pExecutor; - return qStreamRecoverScanStep1Finished(exec); -} - -bool streamTaskRecoverScanStep2Finished(SStreamTask* pTask) { - void* exec = pTask->exec.pExecutor; - return qStreamRecoverScanStep2Finished(exec); -} - -int32_t streamTaskRecoverSetAllStepFinished(SStreamTask* pTask) { +int32_t streamTaskFillHistoryFinished(SStreamTask* pTask) { void* exec = pTask->exec.pExecutor; - return qStreamRecoverSetAllStepFinished(exec); + return qStreamInfoResetTimewindowFilter(exec); } -void streamHistoryTaskSetVerRangeStep2(SStreamTask* pTask) { +bool streamHistoryTaskSetVerRangeStep2(SStreamTask* pTask, int64_t latestVer) { SVersionRange* pRange = &pTask->dataRange.range; - int64_t latestVer = walReaderGetCurrentVer(pTask->exec.pWalReader); ASSERT(latestVer >= pRange->maxVer); int64_t nextStartVer = pRange->maxVer + 1; if (nextStartVer > latestVer - 1) { // no input data yet. no need to execute the secondardy scan while stream task halt - streamTaskRecoverSetAllStepFinished(pTask); + streamTaskFillHistoryFinished(pTask); qDebug( - "s-task:%s no need to perform secondary scan-history-data(step 2), since no data ingest during secondary scan", - pTask->id.idStr); + "s-task:%s no need to perform secondary scan-history data(step 2), since no data ingest during step1 scan, " + "related stream task currentVer:%" PRId64, + pTask->id.idStr, latestVer); + return true; } else { // 2. do secondary scan of the history data, the time window remain, and the version range is updated to // [pTask->dataRange.range.maxVer, ver1] pRange->minVer = nextStartVer; pRange->maxVer = latestVer - 1; + return false; } } - int32_t tEncodeStreamTaskCheckReq(SEncoder* pEncoder, const SStreamTaskCheckReq* pReq) { if (tStartEncode(pEncoder) < 0) return -1; if (tEncodeI64(pEncoder, pReq->reqId) < 0) return -1; @@ -661,54 +733,206 @@ int32_t tDecodeStreamTaskCheckRsp(SDecoder* pDecoder, SStreamTaskCheckRsp* pRsp) int32_t tEncodeStreamScanHistoryFinishReq(SEncoder* pEncoder, const SStreamScanHistoryFinishReq* pReq) { if (tStartEncode(pEncoder) < 0) return -1; if (tEncodeI64(pEncoder, pReq->streamId) < 0) return -1; - if (tEncodeI32(pEncoder, pReq->taskId) < 0) return -1; + if (tEncodeI32(pEncoder, pReq->upstreamTaskId) < 0) return -1; + if (tEncodeI32(pEncoder, pReq->upstreamNodeId) < 0) return -1; + if (tEncodeI32(pEncoder, pReq->downstreamTaskId) < 0) return -1; if (tEncodeI32(pEncoder, pReq->childId) < 0) return -1; tEndEncode(pEncoder); return pEncoder->pos; } + int32_t tDecodeStreamScanHistoryFinishReq(SDecoder* pDecoder, SStreamScanHistoryFinishReq* pReq) { if (tStartDecode(pDecoder) < 0) return -1; if (tDecodeI64(pDecoder, &pReq->streamId) < 0) return -1; - if (tDecodeI32(pDecoder, &pReq->taskId) < 0) return -1; + if (tDecodeI32(pDecoder, &pReq->upstreamTaskId) < 0) return -1; + if (tDecodeI32(pDecoder, &pReq->upstreamNodeId) < 0) return -1; + if (tDecodeI32(pDecoder, &pReq->downstreamTaskId) < 0) return -1; if (tDecodeI32(pDecoder, &pReq->childId) < 0) return -1; tEndDecode(pDecoder); return 0; } -// todo handle race condition, this task may be destroyed -void streamPrepareNdoCheckDownstream(SStreamTask* pTask) { - if (pTask->info.fillHistory) { - qDebug("s-task:%s fill history task, wait for being launched", pTask->id.idStr); - } else { - // calculate the correct start time window, and start the handle the history data for the main task. - if (pTask->historyTaskId.taskId != 0) { - // check downstream tasks for associated scan-history-data tasks - streamCheckHistoryTaskDownstream(pTask); - - // launch current task - SHistDataRange* pRange = &pTask->dataRange; - int64_t ekey = pRange->window.ekey + 1; - int64_t ver = pRange->range.minVer; - - pRange->window.skey = ekey; - pRange->window.ekey = INT64_MAX; - pRange->range.minVer = 0; - pRange->range.maxVer = ver; - - qDebug("s-task:%s level:%d fill-history task exists, update stream time window:%" PRId64 " - %" PRId64 - ", ver range:%" PRId64 " - %" PRId64, - pTask->id.idStr, pTask->info.taskLevel, pRange->window.skey, pRange->window.ekey, pRange->range.minVer, - pRange->range.maxVer); +void streamTaskSetRangeStreamCalc(SStreamTask* pTask) { + if (pTask->historyTaskId.taskId == 0) { + SHistDataRange* pRange = &pTask->dataRange; + if (pTask->info.fillHistory == 1) { + qDebug("s-task:%s fill-history task, time window:%" PRId64 "-%" PRId64 ", verRange:%" PRId64 + "-%" PRId64, + pTask->id.idStr, pRange->window.skey, pRange->window.ekey, pRange->range.minVer, pRange->range.maxVer); } else { - SHistDataRange* pRange = &pTask->dataRange; - qDebug("s-task:%s no associated scan-history task, stream time window:%" PRId64 " - %" PRId64 - ", ver range:%" PRId64 " - %" PRId64, + qDebug("s-task:%s no related fill-history task, stream time window:%" PRId64 "-%" PRId64 ", verRange:%" PRId64 + "-%" PRId64, pTask->id.idStr, pRange->window.skey, pRange->window.ekey, pRange->range.minVer, pRange->range.maxVer); } + } else { + SHistDataRange* pRange = &pTask->dataRange; + + int64_t ekey = 0; + if (pRange->window.ekey < INT64_MAX) { + ekey = pRange->window.ekey + 1; + } else { + ekey = pRange->window.ekey; + } + + int64_t ver = pRange->range.minVer; + + pRange->window.skey = ekey; + pRange->window.ekey = INT64_MAX; + pRange->range.minVer = 0; + pRange->range.maxVer = ver; + + qDebug("s-task:%s level:%d related fill-history task exists, update stream calc time window:%" PRId64 " - %" PRId64 + ", verRang:%" PRId64 " - %" PRId64, + pTask->id.idStr, pTask->info.taskLevel, pRange->window.skey, pRange->window.ekey, pRange->range.minVer, + pRange->range.maxVer); + } +} + +void launchFillHistoryTask(SStreamTask* pTask) { + int32_t tId = pTask->historyTaskId.taskId; + if (tId == 0) { + return; + } - ASSERT(pTask->status.downstreamReady == 0); + ASSERT(pTask->status.downstreamReady == 1); + qDebug("s-task:%s start to launch related fill-history task:0x%x", pTask->id.idStr, tId); - // check downstream tasks for itself - streamTaskCheckDownstreamTasks(pTask); + // launch associated fill history task + streamLaunchFillHistoryTask(pTask); +} + +void streamTaskCheckDownstreamTasks(SStreamTask* pTask) { + if (pTask->info.fillHistory) { + qDebug("s-task:%s fill history task, wait for being launched", pTask->id.idStr); + return; } + + ASSERT(pTask->status.downstreamReady == 0); + + // check downstream tasks for itself + streamTaskDoCheckDownstreamTasks(pTask); +} + +// normal -> pause, pause/stop/dropping -> pause, halt -> pause, scan-history -> pause +void streamTaskPause(SStreamTask* pTask) { + SStreamMeta* pMeta = pTask->pMeta; + + int64_t st = taosGetTimestampMs(); + + int8_t status = pTask->status.taskStatus; + if (status == TASK_STATUS__DROPPING) { + qDebug("vgId:%d s-task:%s task already dropped, do nothing", pMeta->vgId, pTask->id.idStr); + return; + } + + const char* str = streamGetTaskStatusStr(status); + if (status == TASK_STATUS__STOP || status == TASK_STATUS__PAUSE) { + qDebug("vgId:%d s-task:%s task already stopped/paused, status:%s, do nothing", pMeta->vgId, pTask->id.idStr, str); + return; + } + + while (!pTask->status.pauseAllowed || (pTask->status.taskStatus == TASK_STATUS__HALT)) { + status = pTask->status.taskStatus; + if (status == TASK_STATUS__DROPPING) { + qDebug("vgId:%d s-task:%s task already dropped, do nothing", pMeta->vgId, pTask->id.idStr); + return; + } + + if (status == TASK_STATUS__STOP || status == TASK_STATUS__PAUSE) { + qDebug("vgId:%d s-task:%s task already stopped/paused, status:%s, do nothing", pMeta->vgId, pTask->id.idStr, str); + return; + } + + const char* pStatus = streamGetTaskStatusStr(status); + qDebug("s-task:%s wait for the task can be paused, status:%s, vgId:%d", pTask->id.idStr, pStatus, pMeta->vgId); + taosMsleep(100); + } + + // todo: use the task lock, stead of meta lock + taosWLockLatch(&pMeta->lock); + + status = pTask->status.taskStatus; + if (status == TASK_STATUS__DROPPING || status == TASK_STATUS__STOP) { + taosWUnLockLatch(&pMeta->lock); + qDebug("vgId:%d s-task:%s task already dropped/stopped/paused, do nothing", pMeta->vgId, pTask->id.idStr); + return; + } + + atomic_store_8(&pTask->status.keepTaskStatus, pTask->status.taskStatus); + atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__PAUSE); + taosWUnLockLatch(&pMeta->lock); + + // in case of fill-history task, stop the tsdb file scan operation. + if (pTask->info.fillHistory == 1) { + void* pExecutor = pTask->exec.pExecutor; + qKillTask(pExecutor, TSDB_CODE_SUCCESS); + } + + int64_t el = taosGetTimestampMs() - st; + qDebug("vgId:%d s-task:%s set pause flag, prev:%s, elapsed time:%dms", pMeta->vgId, pTask->id.idStr, + streamGetTaskStatusStr(pTask->status.keepTaskStatus), (int32_t)el); +} + +void streamTaskResume(SStreamTask* pTask) { + int8_t status = pTask->status.taskStatus; + if (status == TASK_STATUS__PAUSE) { + pTask->status.taskStatus = pTask->status.keepTaskStatus; + pTask->status.keepTaskStatus = TASK_STATUS__NORMAL; + qDebug("s-task:%s resume from pause", pTask->id.idStr); + } else { + qError("s-task:%s not in pause, failed to resume, status:%s", pTask->id.idStr, streamGetTaskStatusStr(status)); + } +} + +// todo fix race condition +void streamTaskDisablePause(SStreamTask* pTask) { + // pre-condition check + const char* id = pTask->id.idStr; + while (pTask->status.taskStatus == TASK_STATUS__PAUSE) { + qDebug("s-task:%s already in pause, wait for pause being cancelled, and set pause disabled, recheck in 100ms", id); + taosMsleep(100); + } + + qDebug("s-task:%s disable task pause", id); + pTask->status.pauseAllowed = 0; +} + +void streamTaskEnablePause(SStreamTask* pTask) { + qDebug("s-task:%s enable task pause", pTask->id.idStr); + pTask->status.pauseAllowed = 1; +} + +void streamTaskHalt(SStreamTask* pTask) { + int8_t status = pTask->status.taskStatus; + if (status == TASK_STATUS__DROPPING || status == TASK_STATUS__STOP) { + return; + } + + if (status == TASK_STATUS__HALT) { + return; + } + + // upgrade to halt status + if (status == TASK_STATUS__PAUSE) { + qDebug("s-task:%s upgrade status to %s from %s", pTask->id.idStr, streamGetTaskStatusStr(TASK_STATUS__HALT), + streamGetTaskStatusStr(TASK_STATUS__PAUSE)); + } else { + qDebug("s-task:%s halt task", pTask->id.idStr); + } + + pTask->status.keepTaskStatus = status; + pTask->status.taskStatus = TASK_STATUS__HALT; +} + +void streamTaskResumeFromHalt(SStreamTask* pTask) { + const char* id = pTask->id.idStr; + int8_t status = pTask->status.taskStatus; + if (status != TASK_STATUS__HALT) { + qError("s-task:%s not in halt status, status:%s", id, streamGetTaskStatusStr(status)); + return; + } + + pTask->status.taskStatus = pTask->status.keepTaskStatus; + pTask->status.keepTaskStatus = TASK_STATUS__NORMAL; + qDebug("s-task:%s resume from halt, current status:%s", id, streamGetTaskStatusStr(pTask->status.taskStatus)); } diff --git a/source/libs/stream/src/streamState.c b/source/libs/stream/src/streamState.c index 0a4f73a67c8afb319aa4da90b5f0253368d85dee..5b42be182c0a0abc24d587e0a986b9129fc6c416 100644 --- a/source/libs/stream/src/streamState.c +++ b/source/libs/stream/src/streamState.c @@ -729,6 +729,7 @@ void streamStateFreeVal(void* val) { int32_t streamStateSessionPut(SStreamState* pState, const SSessionKey* key, const void* value, int32_t vLen) { #ifdef USE_ROCKSDB + qDebug("===stream===save skey:%" PRId64 ", ekey:%" PRId64 ", groupId:%" PRIu64, key->win.skey,key->win.ekey, key->groupId); return streamStateSessionPut_rocksdb(pState, key, value, vLen); #else SStateSessionKey sKey = {.key = *key, .opNum = pState->number}; diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index ef83583ea4dd19f70599d9c0b45a00bcb0cf94ae..1eb8d119168d23b96a851e0946e968c4e1b1fef8 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -13,6 +13,8 @@ * along with this program. If not, see . */ +#include +#include #include "executor.h" #include "tstream.h" #include "wal.h" @@ -44,7 +46,7 @@ SStreamTask* tNewStreamTask(int64_t streamId, int8_t taskLevel, int8_t fillHisto pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE; pTask->status.taskStatus = TASK_STATUS__SCAN_HISTORY; pTask->inputStatus = TASK_INPUT_STATUS__NORMAL; - pTask->outputStatus = TASK_OUTPUT_STATUS__NORMAL; + pTask->outputInfo.status = TASK_OUTPUT_STATUS__NORMAL; addToTaskset(pTaskList, pTask); return pTask; @@ -74,7 +76,7 @@ int32_t tEncodeStreamTask(SEncoder* pEncoder, const SStreamTask* pTask) { if (tEncodeI32(pEncoder, pTask->id.taskId) < 0) return -1; if (tEncodeI32(pEncoder, pTask->info.totalLevel) < 0) return -1; if (tEncodeI8(pEncoder, pTask->info.taskLevel) < 0) return -1; - if (tEncodeI8(pEncoder, pTask->outputType) < 0) return -1; + if (tEncodeI8(pEncoder, pTask->outputInfo.type) < 0) return -1; if (tEncodeI16(pEncoder, pTask->msgInfo.msgType) < 0) return -1; if (tEncodeI8(pEncoder, pTask->status.taskStatus) < 0) return -1; @@ -109,19 +111,19 @@ int32_t tEncodeStreamTask(SEncoder* pEncoder, const SStreamTask* pTask) { if (tEncodeCStr(pEncoder, pTask->exec.qmsg) < 0) return -1; } - if (pTask->outputType == TASK_OUTPUT__TABLE) { + if (pTask->outputInfo.type == TASK_OUTPUT__TABLE) { if (tEncodeI64(pEncoder, pTask->tbSink.stbUid) < 0) return -1; if (tEncodeCStr(pEncoder, pTask->tbSink.stbFullName) < 0) return -1; if (tEncodeSSchemaWrapper(pEncoder, pTask->tbSink.pSchemaWrapper) < 0) return -1; - } else if (pTask->outputType == TASK_OUTPUT__SMA) { + } else if (pTask->outputInfo.type == TASK_OUTPUT__SMA) { if (tEncodeI64(pEncoder, pTask->smaSink.smaId) < 0) return -1; - } else if (pTask->outputType == TASK_OUTPUT__FETCH) { + } else if (pTask->outputInfo.type == TASK_OUTPUT__FETCH) { if (tEncodeI8(pEncoder, pTask->fetchSink.reserved) < 0) return -1; - } else if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) { + } else if (pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH) { if (tEncodeI32(pEncoder, pTask->fixedEpDispatcher.taskId) < 0) return -1; if (tEncodeI32(pEncoder, pTask->fixedEpDispatcher.nodeId) < 0) return -1; if (tEncodeSEpSet(pEncoder, &pTask->fixedEpDispatcher.epSet) < 0) return -1; - } else if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) { + } else if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) { if (tSerializeSUseDbRspImp(pEncoder, &pTask->shuffleDispatcher.dbInfo) < 0) return -1; if (tEncodeCStr(pEncoder, pTask->shuffleDispatcher.stbFullName) < 0) return -1; } @@ -137,7 +139,7 @@ int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask) { if (tDecodeI32(pDecoder, &pTask->id.taskId) < 0) return -1; if (tDecodeI32(pDecoder, &pTask->info.totalLevel) < 0) return -1; if (tDecodeI8(pDecoder, &pTask->info.taskLevel) < 0) return -1; - if (tDecodeI8(pDecoder, &pTask->outputType) < 0) return -1; + if (tDecodeI8(pDecoder, &pTask->outputInfo.type) < 0) return -1; if (tDecodeI16(pDecoder, &pTask->msgInfo.msgType) < 0) return -1; if (tDecodeI8(pDecoder, &pTask->status.taskStatus) < 0) return -1; @@ -179,21 +181,21 @@ int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask) { if (tDecodeCStrAlloc(pDecoder, &pTask->exec.qmsg) < 0) return -1; } - if (pTask->outputType == TASK_OUTPUT__TABLE) { + if (pTask->outputInfo.type == TASK_OUTPUT__TABLE) { if (tDecodeI64(pDecoder, &pTask->tbSink.stbUid) < 0) return -1; if (tDecodeCStrTo(pDecoder, pTask->tbSink.stbFullName) < 0) return -1; pTask->tbSink.pSchemaWrapper = taosMemoryCalloc(1, sizeof(SSchemaWrapper)); if (pTask->tbSink.pSchemaWrapper == NULL) return -1; if (tDecodeSSchemaWrapper(pDecoder, pTask->tbSink.pSchemaWrapper) < 0) return -1; - } else if (pTask->outputType == TASK_OUTPUT__SMA) { + } else if (pTask->outputInfo.type == TASK_OUTPUT__SMA) { if (tDecodeI64(pDecoder, &pTask->smaSink.smaId) < 0) return -1; - } else if (pTask->outputType == TASK_OUTPUT__FETCH) { + } else if (pTask->outputInfo.type == TASK_OUTPUT__FETCH) { if (tDecodeI8(pDecoder, &pTask->fetchSink.reserved) < 0) return -1; - } else if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) { + } else if (pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH) { if (tDecodeI32(pDecoder, &pTask->fixedEpDispatcher.taskId) < 0) return -1; if (tDecodeI32(pDecoder, &pTask->fixedEpDispatcher.nodeId) < 0) return -1; if (tDecodeSEpSet(pDecoder, &pTask->fixedEpDispatcher.epSet) < 0) return -1; - } else if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) { + } else if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) { if (tDeserializeSUseDbRspImp(pDecoder, &pTask->shuffleDispatcher.dbInfo) < 0) return -1; if (tDecodeCStrTo(pDecoder, pTask->shuffleDispatcher.stbFullName) < 0) return -1; } @@ -203,16 +205,21 @@ int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask) { return 0; } +static void freeItem(void* p) { + SStreamContinueExecInfo* pInfo = p; + rpcFreeCont(pInfo->msg.pCont); +} + void tFreeStreamTask(SStreamTask* pTask) { - qDebug("free s-task:%s", pTask->id.idStr); + qDebug("free s-task:%s, %p", pTask->id.idStr, pTask); int32_t status = atomic_load_8((int8_t*)&(pTask->status.taskStatus)); if (pTask->inputQueue) { streamQueueClose(pTask->inputQueue); } - if (pTask->outputQueue) { - streamQueueClose(pTask->outputQueue); + if (pTask->outputInfo.queue) { + streamQueueClose(pTask->outputInfo.queue); } if (pTask->exec.qmsg) { @@ -229,11 +236,11 @@ void tFreeStreamTask(SStreamTask* pTask) { } taosArrayDestroyP(pTask->pUpstreamEpInfoList, taosMemoryFree); - if (pTask->outputType == TASK_OUTPUT__TABLE) { + if (pTask->outputInfo.type == TASK_OUTPUT__TABLE) { tDeleteSchemaWrapper(pTask->tbSink.pSchemaWrapper); taosMemoryFree(pTask->tbSink.pTSchema); tSimpleHashCleanup(pTask->tbSink.pTblInfo); - } else if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) { + } else if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) { taosArrayDestroy(pTask->shuffleDispatcher.dbInfo.pVgroupInfos); taosArrayDestroy(pTask->checkReqIds); pTask->checkReqIds = NULL; @@ -251,5 +258,11 @@ void tFreeStreamTask(SStreamTask* pTask) { tSimpleHashCleanup(pTask->pNameMap); } + if (pTask->pRspMsgList != NULL) { + taosArrayDestroyEx(pTask->pRspMsgList, freeItem); + pTask->pRspMsgList = NULL; + } + + taosThreadMutexDestroy(&pTask->lock); taosMemoryFree(pTask); } diff --git a/source/libs/stream/src/streamUpdate.c b/source/libs/stream/src/streamUpdate.c index 85be120dbd562f2ce6526b391c937a362396b569..7a8de91d7735fb7b43a8fb65e747ff5aa7737723 100644 --- a/source/libs/stream/src/streamUpdate.c +++ b/source/libs/stream/src/streamUpdate.c @@ -33,7 +33,7 @@ static int64_t adjustExpEntries(int64_t entries) { return TMIN(DEFAULT_EXPECTED_ENTRIES, entries); } -static void windowSBfAdd(SUpdateInfo *pInfo, uint64_t count) { +void windowSBfAdd(SUpdateInfo *pInfo, uint64_t count) { if (pInfo->numSBFs < count) { count = pInfo->numSBFs; } @@ -49,7 +49,7 @@ static void clearItemHelper(void *p) { tScalableBfDestroy(*pBf); } -static void windowSBfDelete(SUpdateInfo *pInfo, uint64_t count) { +void windowSBfDelete(SUpdateInfo *pInfo, uint64_t count) { if (count < pInfo->numSBFs) { for (uint64_t i = 0; i < count; ++i) { SScalableBf *pTsSBFs = taosArrayGetP(pInfo->pTsSBFs, 0); diff --git a/source/libs/sync/inc/syncInt.h b/source/libs/sync/inc/syncInt.h index e79e7a3f75245ae9f4a52fc1f1cfa5cfce23ffc1..870cdd6a728fc676fe5a93777de80d16b90b92ec 100644 --- a/source/libs/sync/inc/syncInt.h +++ b/source/libs/sync/inc/syncInt.h @@ -213,7 +213,7 @@ typedef struct SSyncNode { int64_t minMatchIndex; int64_t startTime; - int64_t leaderTime; + int64_t roleTimeMs; int64_t lastReplicateTime; int32_t electNum; diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index f200212538aa396b41bd8baa5620660cedd02931..ace4a7c9c582266b07a6a23d5b7183e8aaa20c85 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -508,6 +508,7 @@ SSyncState syncGetState(int64_t rid) { SSyncNode* pSyncNode = syncNodeAcquire(rid); if (pSyncNode != NULL) { state.state = pSyncNode->state; + state.roleTimeMs = pSyncNode->roleTimeMs; state.restored = pSyncNode->restoreFinish; if (pSyncNode->vgId != 1) { state.canRead = syncNodeIsReadyForRead(pSyncNode); @@ -528,6 +529,7 @@ SSyncState syncGetState(int64_t rid) { pSyncNode->vgId, pSyncNode->pLogBuf->commitIndex, pSyncNode->pLogBuf->totalIndex, progress, state.progress); */ + state.term = raftStoreGetTerm(pSyncNode); syncNodeRelease(pSyncNode); } @@ -946,6 +948,7 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo, int32_t vnodeVersion) { // init TLA+ server vars pSyncNode->state = TAOS_SYNC_STATE_FOLLOWER; + pSyncNode->roleTimeMs = taosGetTimestampMs(); if (raftStoreOpen(pSyncNode) != 0) { sError("vgId:%d, failed to open raft store at path %s", pSyncNode->vgId, pSyncNode->raftStorePath); goto _error; @@ -1083,7 +1086,6 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo, int32_t vnodeVersion) { int64_t timeNow = taosGetTimestampMs(); pSyncNode->startTime = timeNow; - pSyncNode->leaderTime = timeNow; pSyncNode->lastReplicateTime = timeNow; // snapshotting @@ -1179,6 +1181,7 @@ int32_t syncNodeStart(SSyncNode* pSyncNode) { int32_t syncNodeStartStandBy(SSyncNode* pSyncNode) { // state change pSyncNode->state = TAOS_SYNC_STATE_FOLLOWER; + pSyncNode->roleTimeMs = taosGetTimestampMs(); syncNodeStopHeartbeatTimer(pSyncNode); // reset elect timer, long enough @@ -1715,6 +1718,7 @@ void syncNodeBecomeFollower(SSyncNode* pSyncNode, const char* debugStr) { // state change pSyncNode->state = TAOS_SYNC_STATE_FOLLOWER; + pSyncNode->roleTimeMs = taosGetTimestampMs(); syncNodeStopHeartbeatTimer(pSyncNode); // trace log @@ -1743,6 +1747,7 @@ void syncNodeBecomeLearner(SSyncNode* pSyncNode, const char* debugStr) { // state change pSyncNode->state = TAOS_SYNC_STATE_LEARNER; + pSyncNode->roleTimeMs = taosGetTimestampMs(); // trace log sNTrace(pSyncNode, "become learner %s", debugStr); @@ -1778,8 +1783,6 @@ void syncNodeBecomeLearner(SSyncNode* pSyncNode, const char* debugStr) { // /\ UNCHANGED <> // void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr) { - pSyncNode->leaderTime = taosGetTimestampMs(); - pSyncNode->becomeLeaderNum++; pSyncNode->hbrSlowNum = 0; @@ -1788,6 +1791,7 @@ void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr) { // state change pSyncNode->state = TAOS_SYNC_STATE_LEADER; + pSyncNode->roleTimeMs = taosGetTimestampMs(); // set leader cache pSyncNode->leaderCache = pSyncNode->myRaftId; @@ -1887,6 +1891,7 @@ int32_t syncNodePeerStateInit(SSyncNode* pSyncNode) { void syncNodeFollower2Candidate(SSyncNode* pSyncNode) { ASSERT(pSyncNode->state == TAOS_SYNC_STATE_FOLLOWER); pSyncNode->state = TAOS_SYNC_STATE_CANDIDATE; + pSyncNode->roleTimeMs = taosGetTimestampMs(); SyncIndex lastIndex = pSyncNode->pLogStore->syncLogLastIndex(pSyncNode->pLogStore); sInfo("vgId:%d, become candidate from follower. term:%" PRId64 ", commit index:%" PRId64 ", last index:%" PRId64, pSyncNode->vgId, raftStoreGetTerm(pSyncNode), pSyncNode->commitIndex, lastIndex); diff --git a/source/libs/sync/src/syncTimeout.c b/source/libs/sync/src/syncTimeout.c index 5ee67da9ab8d2cef07b7d85ef275199e8cf2ef0e..37166805cee2746313b8511776928ccb74255878 100644 --- a/source/libs/sync/src/syncTimeout.c +++ b/source/libs/sync/src/syncTimeout.c @@ -87,22 +87,6 @@ static int32_t syncNodeTimerRoutine(SSyncNode* ths) { } } - if (atomic_load_64(&ths->snapshottingIndex) != SYNC_INDEX_INVALID) { - // end timeout wal snapshot - if (timeNow - ths->snapshottingTime > SYNC_DEL_WAL_MS && - atomic_load_64(&ths->snapshottingIndex) != SYNC_INDEX_INVALID) { - SSyncLogStoreData* pData = ths->pLogStore->data; - int32_t code = walEndSnapshot(pData->pWal); - if (code != 0) { - sNError(ths, "timer wal snapshot end error since:%s", terrstr()); - return -1; - } else { - sNTrace(ths, "wal snapshot end, index:%" PRId64, atomic_load_64(&ths->snapshottingIndex)); - atomic_store_64(&ths->snapshottingIndex, SYNC_INDEX_INVALID); - } - } - } - if (!syncNodeIsMnode(ths)) { syncRespClean(ths->pSyncRespMgr); } diff --git a/source/libs/transport/src/thttp.c b/source/libs/transport/src/thttp.c index 04b546b36a48bd1df071c7b077d82c57c2ddea8a..c483d82027ae971da2644fb5c2ef8d2ee1f94c6c 100644 --- a/source/libs/transport/src/thttp.c +++ b/source/libs/transport/src/thttp.c @@ -391,7 +391,13 @@ static void httpHandleReq(SHttpMsg* msg) { // set up timeout to avoid stuck; int32_t fd = taosCreateSocketWithTimeout(5); - int ret = uv_tcp_open((uv_tcp_t*)&cli->tcp, fd); + if (fd < 0) { + tError("http-report failed to open socket, dst:%s:%d", cli->addr, cli->port); + taosReleaseRef(httpRefMgt, httpRef); + destroyHttpClient(cli); + return; + } + int ret = uv_tcp_open((uv_tcp_t*)&cli->tcp, fd); if (ret != 0) { tError("http-report failed to open socket, reason:%s, dst:%s:%d", uv_strerror(ret), cli->addr, cli->port); taosReleaseRef(httpRefMgt, httpRef); diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 8062a0618b64e4756fbcdfe9084cfde249a6e4f4..01223a2be96de797c4aaba8d5d6184e2c4116fcb 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -73,7 +73,7 @@ typedef struct SCliConn { SDelayTask* task; - char* ip; + char* dstAddr; char src[32]; char dst[32]; @@ -196,6 +196,7 @@ static FORCE_INLINE int32_t cliBuildExceptResp(SCliMsg* pMsg, STransMsg* resp); static FORCE_INLINE uint32_t cliGetIpFromFqdnCache(SHashObj* cache, char* fqdn); static FORCE_INLINE void cliUpdateFqdnCache(SHashObj* cache, char* fqdn); +static FORCE_INLINE void cliMayUpdateFqdnCache(SHashObj* cache, char* dst); // process data read from server, add decompress etc later static void cliHandleResp(SCliConn* conn); // handle except about conn @@ -543,6 +544,7 @@ void cliConnTimeout(uv_timer_t* handle) { taosArrayPush(pThrd->timerList, &conn->timer); conn->timer = NULL; + cliMayUpdateFqdnCache(pThrd->fqdn2ipCache, conn->dstAddr); cliHandleFastFail(conn, UV_ECANCELED); } void cliReadTimeoutCb(uv_timer_t* handle) { @@ -719,7 +721,7 @@ static void addConnToPool(void* pool, SCliConn* conn) { cliDestroyConnMsgs(conn, false); if (conn->list == NULL) { - conn->list = taosHashGet((SHashObj*)pool, conn->ip, strlen(conn->ip) + 1); + conn->list = taosHashGet((SHashObj*)pool, conn->dstAddr, strlen(conn->dstAddr) + 1); } SConnList* pList = conn->list; @@ -878,7 +880,7 @@ static void cliDestroyConn(SCliConn* conn, bool clear) { connList->list->numOfConn--; connList->size--; } else { - SConnList* connList = taosHashGet((SHashObj*)pThrd->pool, conn->ip, strlen(conn->ip) + 1); + SConnList* connList = taosHashGet((SHashObj*)pThrd->pool, conn->dstAddr, strlen(conn->dstAddr) + 1); if (connList != NULL) connList->list->numOfConn--; } conn->list = NULL; @@ -923,7 +925,7 @@ static void cliDestroy(uv_handle_t* handle) { transReleaseExHandle(transGetRefMgt(), conn->refId); transRemoveExHandle(transGetRefMgt(), conn->refId); - taosMemoryFree(conn->ip); + taosMemoryFree(conn->dstAddr); taosMemoryFree(conn->stream); cliDestroyConnMsgs(conn, true); @@ -1168,7 +1170,7 @@ static void cliHandleBatchReq(SCliBatch* pBatch, SCliThrd* pThrd) { if (conn == NULL) { conn = cliCreateConn(pThrd); conn->pBatch = pBatch; - conn->ip = taosStrdup(pList->dst); + conn->dstAddr = taosStrdup(pList->dst); uint32_t ipaddr = cliGetIpFromFqdnCache(pThrd->fqdn2ipCache, pList->ip); if (ipaddr == 0xffffffff) { @@ -1213,6 +1215,8 @@ static void cliHandleBatchReq(SCliBatch* pBatch, SCliThrd* pThrd) { conn->timer->data = NULL; taosArrayPush(pThrd->timerList, &conn->timer); conn->timer = NULL; + + cliMayUpdateFqdnCache(pThrd->fqdn2ipCache, conn->dstAddr); cliHandleFastFail(conn, -1); return; } @@ -1271,11 +1275,11 @@ static void cliHandleFastFail(SCliConn* pConn, int status) { STraceId* trace = &pMsg->msg.info.traceId; tGError("%s msg %s failed to send, conn %p failed to connect to %s, reason: %s", CONN_GET_INST_LABEL(pConn), - TMSG_INFO(pMsg->msg.msgType), pConn, pConn->ip, uv_strerror(status)); + TMSG_INFO(pMsg->msg.msgType), pConn, pConn->dstAddr, uv_strerror(status)); if (pMsg != NULL && REQUEST_NO_RESP(&pMsg->msg) && (pTransInst->failFastFp != NULL && pTransInst->failFastFp(pMsg->msg.msgType))) { - SFailFastItem* item = taosHashGet(pThrd->failFastCache, pConn->ip, strlen(pConn->ip) + 1); + SFailFastItem* item = taosHashGet(pThrd->failFastCache, pConn->dstAddr, strlen(pConn->dstAddr) + 1); int64_t cTimestamp = taosGetTimestampMs(); if (item != NULL) { int32_t elapse = cTimestamp - item->timestamp; @@ -1287,12 +1291,12 @@ static void cliHandleFastFail(SCliConn* pConn, int status) { } } else { SFailFastItem item = {.count = 1, .timestamp = cTimestamp}; - taosHashPut(pThrd->failFastCache, pConn->ip, strlen(pConn->ip) + 1, &item, sizeof(SFailFastItem)); + taosHashPut(pThrd->failFastCache, pConn->dstAddr, strlen(pConn->dstAddr) + 1, &item, sizeof(SFailFastItem)); } } } else { tError("%s batch msg failed to send, conn %p failed to connect to %s, reason: %s", CONN_GET_INST_LABEL(pConn), - pConn, pConn->ip, uv_strerror(status)); + pConn, pConn->dstAddr, uv_strerror(status)); cliDestroyBatch(pConn->pBatch); pConn->pBatch = NULL; } @@ -1314,6 +1318,7 @@ void cliConnCb(uv_connect_t* req, int status) { } if (status != 0) { + cliMayUpdateFqdnCache(pThrd->fqdn2ipCache, pConn->dstAddr); if (timeout == false) { cliHandleFastFail(pConn, status); } else if (timeout == true) { @@ -1483,9 +1488,34 @@ static FORCE_INLINE uint32_t cliGetIpFromFqdnCache(SHashObj* cache, char* fqdn) } static FORCE_INLINE void cliUpdateFqdnCache(SHashObj* cache, char* fqdn) { // impl later + uint32_t addr = taosGetIpv4FromFqdn(fqdn); + if (addr != 0xffffffff) { + uint32_t* v = taosHashGet(cache, fqdn, strlen(fqdn) + 1); + if (addr != *v) { + char old[64] = {0}, new[64] = {0}; + tinet_ntoa(old, *v); + tinet_ntoa(new, addr); + tWarn("update ip of fqdn:%s, old: %s, new: %s", fqdn, old, new); + taosHashPut(cache, fqdn, strlen(fqdn) + 1, &addr, sizeof(addr)); + } + } return; } +static void cliMayUpdateFqdnCache(SHashObj* cache, char* dst) { + if (dst == NULL) return; + + int16_t i = 0, len = strlen(dst); + for (i = len - 1; i >= 0; i--) { + if (dst[i] == ':') break; + } + if (i > 0) { + char fqdn[TSDB_FQDN_LEN + 1] = {0}; + memcpy(fqdn, dst, i); + cliUpdateFqdnCache(cache, fqdn); + } +} + static void doFreeTimeoutMsg(void* param) { STaskArg* arg = param; SCliMsg* pMsg = arg->param1; @@ -1560,7 +1590,7 @@ void cliHandleReq(SCliMsg* pMsg, SCliThrd* pThrd) { transCtxMerge(&conn->ctx, &pMsg->ctx->appCtx); transQueuePush(&conn->cliMsgs, pMsg); - conn->ip = taosStrdup(addr); + conn->dstAddr = taosStrdup(addr); uint32_t ipaddr = cliGetIpFromFqdnCache(pThrd->fqdn2ipCache, fqdn); if (ipaddr == 0xffffffff) { @@ -1578,7 +1608,7 @@ void cliHandleReq(SCliMsg* pMsg, SCliThrd* pThrd) { addr.sin_addr.s_addr = ipaddr; addr.sin_port = (uint16_t)htons(port); - tGTrace("%s conn %p try to connect to %s", pTransInst->label, conn, conn->ip); + tGTrace("%s conn %p try to connect to %s", pTransInst->label, conn, conn->dstAddr); pThrd->newConnCount++; int32_t fd = taosCreateSocketWithTimeout(TRANS_CONN_TIMEOUT * 4); if (fd == -1) { @@ -1608,6 +1638,7 @@ void cliHandleReq(SCliMsg* pMsg, SCliThrd* pThrd) { taosArrayPush(pThrd->timerList, &conn->timer); conn->timer = NULL; + cliMayUpdateFqdnCache(pThrd->fqdn2ipCache, conn->dstAddr); cliHandleFastFail(conn, ret); return; } diff --git a/source/libs/wal/src/walRead.c b/source/libs/wal/src/walRead.c index 786f48ce88fc9a66ed3c05506108f4759aa4e8a8..038fbe444b925024cf9bd724c9dd5a50506419ff 100644 --- a/source/libs/wal/src/walRead.c +++ b/source/libs/wal/src/walRead.c @@ -70,17 +70,18 @@ int32_t walNextValidMsg(SWalReader *pReader) { int64_t fetchVer = pReader->curVersion; int64_t lastVer = walGetLastVer(pReader->pWal); int64_t committedVer = walGetCommittedVer(pReader->pWal); - int64_t appliedVer = walGetAppliedVer(pReader->pWal); +// int64_t appliedVer = walGetAppliedVer(pReader->pWal); - if(appliedVer < committedVer){ // wait apply ver equal to commit ver, otherwise may lost data when consume data [TD-24010] - wDebug("vgId:%d, wal apply ver:%"PRId64" smaller than commit ver:%"PRId64, pReader->pWal->cfg.vgId, appliedVer, committedVer); - } +// if(appliedVer < committedVer){ // wait apply ver equal to commit ver, otherwise may lost data when consume data [TD-24010] +// wDebug("vgId:%d, wal apply ver:%"PRId64" smaller than commit ver:%"PRId64, pReader->pWal->cfg.vgId, appliedVer, committedVer); +// } - int64_t endVer = TMIN(appliedVer, committedVer); +// int64_t endVer = TMIN(appliedVer, committedVer); + int64_t endVer = committedVer; wDebug("vgId:%d, wal start to fetch, index:%" PRId64 ", last index:%" PRId64 " commit index:%" PRId64 - ", applied index:%" PRId64", end index:%" PRId64, - pReader->pWal->cfg.vgId, fetchVer, lastVer, committedVer, appliedVer, endVer); + ", end index:%" PRId64, + pReader->pWal->cfg.vgId, fetchVer, lastVer, committedVer, endVer); if (fetchVer > endVer){ terrno = TSDB_CODE_WAL_LOG_NOT_EXIST; @@ -135,8 +136,8 @@ void walReaderVerifyOffset(SWalReader *pWalReader, STqOffsetVal* pOffset){ int64_t firstVer = walGetFirstVer((pWalReader)->pWal); taosThreadMutexUnlock(&pWalReader->pWal->mutex); - if (pOffset->version + 1 < firstVer){ - pOffset->version = firstVer - 1; + if (pOffset->version < firstVer){ + pOffset->version = firstVer; } } @@ -370,7 +371,7 @@ int32_t walFetchHead(SWalReader *pRead, int64_t ver, SWalCkHead *pHead) { pRead->pWal->vers.appliedVer); // TODO: valid ver - if (ver > pRead->pWal->vers.appliedVer) { + if (ver > pRead->pWal->vers.commitVer) { return -1; } diff --git a/source/os/src/osSysinfo.c b/source/os/src/osSysinfo.c index 6f87f6b75bebb53b0b835ce8c2bcaca64c686a46..5f73251e3b9dff37395198709269dc75c599f8a9 100644 --- a/source/os/src/osSysinfo.c +++ b/source/os/src/osSysinfo.c @@ -961,6 +961,18 @@ char *taosGetCmdlineByPID(int pid) { #endif } +int64_t taosGetOsUptime() { +#ifdef WINDOWS +#elif defined(_TD_DARWIN_64) +#else + struct sysinfo info; + if (0 == sysinfo(&info)) { + return (int64_t)info.uptime * 1000; + } +#endif + return 0; +} + void taosSetCoreDump(bool enable) { if (!enable) return; #ifdef WINDOWS diff --git a/source/os/src/osThread.c b/source/os/src/osThread.c index 39ba92fdc5e93d45facd85a2015461ad1fd68d8c..4c4e22bdd92846631026a74064f4f4d6a0a14a4f 100644 --- a/source/os/src/osThread.c +++ b/source/os/src/osThread.c @@ -17,6 +17,15 @@ #include #include "os.h" +#ifdef WINDOWS +#define THREAD_PTR_CHECK(p) \ + do { \ + if (!(p) || !(*(p))) return 0; \ + } while (0); +#else +#define THREAD_PTR_CHECK(p) +#endif + int32_t taosThreadCreate(TdThread *tid, const TdThreadAttr *attr, void *(*start)(void *), void *arg) { return pthread_create(tid, attr, start, arg); } @@ -83,9 +92,13 @@ int32_t taosThreadCondSignal(TdThreadCond *cond) { return pthread_cond_signal(co int32_t taosThreadCondBroadcast(TdThreadCond *cond) { return pthread_cond_broadcast(cond); } -int32_t taosThreadCondWait(TdThreadCond *cond, TdThreadMutex *mutex) { return pthread_cond_wait(cond, mutex); } +int32_t taosThreadCondWait(TdThreadCond *cond, TdThreadMutex *mutex) { + THREAD_PTR_CHECK(mutex) + return pthread_cond_wait(cond, mutex); +} int32_t taosThreadCondTimedWait(TdThreadCond *cond, TdThreadMutex *mutex, const struct timespec *abstime) { + THREAD_PTR_CHECK(mutex) return pthread_cond_timedwait(cond, mutex, abstime); } @@ -124,24 +137,37 @@ int32_t taosThreadKeyDelete(TdThreadKey key) { return pthread_key_delete(key); } int32_t taosThreadKill(TdThread thread, int32_t sig) { return pthread_kill(thread, sig); } // int32_t taosThreadMutexConsistent(TdThreadMutex* mutex) { +// THREAD_PTR_CHECK(mutex) // return pthread_mutex_consistent(mutex); // } -int32_t taosThreadMutexDestroy(TdThreadMutex *mutex) { return pthread_mutex_destroy(mutex); } +int32_t taosThreadMutexDestroy(TdThreadMutex *mutex) { + THREAD_PTR_CHECK(mutex) + return pthread_mutex_destroy(mutex); +} int32_t taosThreadMutexInit(TdThreadMutex *mutex, const TdThreadMutexAttr *attr) { return pthread_mutex_init(mutex, attr); } -int32_t taosThreadMutexLock(TdThreadMutex *mutex) { return pthread_mutex_lock(mutex); } +int32_t taosThreadMutexLock(TdThreadMutex *mutex) { + THREAD_PTR_CHECK(mutex) + return pthread_mutex_lock(mutex); +} // int32_t taosThreadMutexTimedLock(TdThreadMutex * mutex, const struct timespec *abstime) { // return pthread_mutex_timedlock(mutex, abstime); // } -int32_t taosThreadMutexTryLock(TdThreadMutex *mutex) { return pthread_mutex_trylock(mutex); } +int32_t taosThreadMutexTryLock(TdThreadMutex *mutex) { + THREAD_PTR_CHECK(mutex) + return pthread_mutex_trylock(mutex); +} -int32_t taosThreadMutexUnlock(TdThreadMutex *mutex) { return pthread_mutex_unlock(mutex); } +int32_t taosThreadMutexUnlock(TdThreadMutex *mutex) { + THREAD_PTR_CHECK(mutex) + return pthread_mutex_unlock(mutex); +} int32_t taosThreadMutexAttrDestroy(TdThreadMutexAttr *attr) { return pthread_mutexattr_destroy(attr); } @@ -224,6 +250,7 @@ int32_t taosThreadSetSchedParam(TdThread thread, int32_t policy, const struct sc int32_t taosThreadSetSpecific(TdThreadKey key, const void *value) { return pthread_setspecific(key, value); } int32_t taosThreadSpinDestroy(TdThreadSpinlock *lock) { + THREAD_PTR_CHECK(lock) #ifdef TD_USE_SPINLOCK_AS_MUTEX return pthread_mutex_destroy((pthread_mutex_t *)lock); #else @@ -242,6 +269,7 @@ int32_t taosThreadSpinInit(TdThreadSpinlock *lock, int32_t pshared) { } int32_t taosThreadSpinLock(TdThreadSpinlock *lock) { + THREAD_PTR_CHECK(lock) #ifdef TD_USE_SPINLOCK_AS_MUTEX return pthread_mutex_lock((pthread_mutex_t *)lock); #else @@ -250,6 +278,7 @@ int32_t taosThreadSpinLock(TdThreadSpinlock *lock) { } int32_t taosThreadSpinTrylock(TdThreadSpinlock *lock) { + THREAD_PTR_CHECK(lock) #ifdef TD_USE_SPINLOCK_AS_MUTEX return pthread_mutex_trylock((pthread_mutex_t *)lock); #else @@ -258,6 +287,7 @@ int32_t taosThreadSpinTrylock(TdThreadSpinlock *lock) { } int32_t taosThreadSpinUnlock(TdThreadSpinlock *lock) { + THREAD_PTR_CHECK(lock) #ifdef TD_USE_SPINLOCK_AS_MUTEX return pthread_mutex_unlock((pthread_mutex_t *)lock); #else diff --git a/source/util/CMakeLists.txt b/source/util/CMakeLists.txt index 6c9aff046c8dcd9930e98a1a10153e51d019cd58..9c2ed190c1f7f1f16936e6ef8ebac5289435c290 100644 --- a/source/util/CMakeLists.txt +++ b/source/util/CMakeLists.txt @@ -5,6 +5,13 @@ if (DEFINED GRANT_CFG_INCLUDE_DIR) add_definitions(-DGRANTS_CFG) endif() +IF (${ASSERT_NOT_CORE}) + ADD_DEFINITIONS(-DASSERT_NOT_CORE) + MESSAGE(STATUS "disable assert core") +ELSE () + MESSAGE(STATUS "enable assert core") +ENDIF (${ASSERT_NOT_CORE}) + target_include_directories( util PUBLIC "${TD_SOURCE_DIR}/include/util" diff --git a/source/util/src/tarray.c b/source/util/src/tarray.c index 8906391a9a9de3244e434137675f9ca5eff30405..f5e15e7436240e612e4dae3263acaa09363a38d1 100644 --- a/source/util/src/tarray.c +++ b/source/util/src/tarray.c @@ -191,7 +191,7 @@ void* taosArrayGet(const SArray* pArray, size_t index) { } if (index >= pArray->size) { - uError("index is out of range, current:%"PRIzu" max:%d", index, pArray->capacity); + uError("index is out of range, current:%" PRIzu " max:%d", index, pArray->capacity); return NULL; } @@ -221,7 +221,7 @@ size_t taosArrayGetSize(const SArray* pArray) { return TARRAY_SIZE(pArray); } -void* taosArrayInsert(SArray* pArray, size_t index, void* pData) { +void* taosArrayInsert(SArray* pArray, size_t index, const void* pData) { if (pArray == NULL || pData == NULL) { return NULL; } @@ -492,7 +492,7 @@ void* taosDecodeArray(const void* buf, SArray** pArray, FDecode decode, int32_t // order array void taosArraySortPWithExt(SArray* pArray, __ext_compar_fn_t fn, const void* param) { taosqsort(pArray->pData, pArray->size, pArray->elemSize, param, fn); -// taosArrayGetSize(pArray) > 8 ? taosArrayQuickSort(pArray, fn, param) : taosArrayInsertSort(pArray, fn, param); + // taosArrayGetSize(pArray) > 8 ? taosArrayQuickSort(pArray, fn, param) : taosArrayInsertSort(pArray, fn, param); } void taosArraySwap(SArray* a, SArray* b) { diff --git a/source/util/src/tconfig.c b/source/util/src/tconfig.c index 288ea6052b580909198788f528871633048e19f1..3681c7a423c82cbd343d3660cdd6432614a6c919 100644 --- a/source/util/src/tconfig.c +++ b/source/util/src/tconfig.c @@ -380,43 +380,43 @@ static int32_t cfgAddItem(SConfig *pCfg, SConfigItem *pItem, const char *name) { return 0; } -int32_t cfgAddBool(SConfig *pCfg, const char *name, bool defaultVal, bool tsc) { - SConfigItem item = {.dtype = CFG_DTYPE_BOOL, .bval = defaultVal, .tsc = tsc}; +int32_t cfgAddBool(SConfig *pCfg, const char *name, bool defaultVal, int8_t scope) { + SConfigItem item = {.dtype = CFG_DTYPE_BOOL, .bval = defaultVal, .scope = scope}; return cfgAddItem(pCfg, &item, name); } -int32_t cfgAddInt32(SConfig *pCfg, const char *name, int32_t defaultVal, int64_t minval, int64_t maxval, bool tsc) { +int32_t cfgAddInt32(SConfig *pCfg, const char *name, int32_t defaultVal, int64_t minval, int64_t maxval, int8_t scope) { if (defaultVal < minval || defaultVal > maxval) { terrno = TSDB_CODE_OUT_OF_RANGE; return -1; } - SConfigItem item = {.dtype = CFG_DTYPE_INT32, .i32 = defaultVal, .imin = minval, .imax = maxval, .tsc = tsc}; + SConfigItem item = {.dtype = CFG_DTYPE_INT32, .i32 = defaultVal, .imin = minval, .imax = maxval, .scope = scope}; return cfgAddItem(pCfg, &item, name); } -int32_t cfgAddInt64(SConfig *pCfg, const char *name, int64_t defaultVal, int64_t minval, int64_t maxval, bool tsc) { +int32_t cfgAddInt64(SConfig *pCfg, const char *name, int64_t defaultVal, int64_t minval, int64_t maxval, int8_t scope) { if (defaultVal < minval || defaultVal > maxval) { terrno = TSDB_CODE_OUT_OF_RANGE; return -1; } - SConfigItem item = {.dtype = CFG_DTYPE_INT64, .i64 = defaultVal, .imin = minval, .imax = maxval, .tsc = tsc}; + SConfigItem item = {.dtype = CFG_DTYPE_INT64, .i64 = defaultVal, .imin = minval, .imax = maxval, .scope = scope}; return cfgAddItem(pCfg, &item, name); } -int32_t cfgAddFloat(SConfig *pCfg, const char *name, float defaultVal, double minval, double maxval, bool tsc) { +int32_t cfgAddFloat(SConfig *pCfg, const char *name, float defaultVal, double minval, double maxval, int8_t scope) { if (defaultVal < minval || defaultVal > maxval) { terrno = TSDB_CODE_OUT_OF_RANGE; return -1; } - SConfigItem item = {.dtype = CFG_DTYPE_FLOAT, .fval = defaultVal, .fmin = minval, .fmax = maxval, .tsc = tsc}; + SConfigItem item = {.dtype = CFG_DTYPE_FLOAT, .fval = defaultVal, .fmin = minval, .fmax = maxval, .scope = scope}; return cfgAddItem(pCfg, &item, name); } -int32_t cfgAddString(SConfig *pCfg, const char *name, const char *defaultVal, bool tsc) { - SConfigItem item = {.dtype = CFG_DTYPE_STRING, .tsc = tsc}; +int32_t cfgAddString(SConfig *pCfg, const char *name, const char *defaultVal, int8_t scope) { + SConfigItem item = {.dtype = CFG_DTYPE_STRING, .scope = scope}; item.str = taosStrdup(defaultVal); if (item.str == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -425,8 +425,8 @@ int32_t cfgAddString(SConfig *pCfg, const char *name, const char *defaultVal, bo return cfgAddItem(pCfg, &item, name); } -int32_t cfgAddDir(SConfig *pCfg, const char *name, const char *defaultVal, bool tsc) { - SConfigItem item = {.dtype = CFG_DTYPE_DIR, .tsc = tsc}; +int32_t cfgAddDir(SConfig *pCfg, const char *name, const char *defaultVal, int8_t scope) { + SConfigItem item = {.dtype = CFG_DTYPE_DIR, .scope = scope}; if (cfgCheckAndSetDir(&item, defaultVal) != 0) { return -1; } @@ -434,8 +434,8 @@ int32_t cfgAddDir(SConfig *pCfg, const char *name, const char *defaultVal, bool return cfgAddItem(pCfg, &item, name); } -int32_t cfgAddLocale(SConfig *pCfg, const char *name, const char *defaultVal) { - SConfigItem item = {.dtype = CFG_DTYPE_LOCALE, .tsc = 1}; +int32_t cfgAddLocale(SConfig *pCfg, const char *name, const char *defaultVal, int8_t scope) { + SConfigItem item = {.dtype = CFG_DTYPE_LOCALE, .scope = scope}; if (cfgCheckAndSetLocale(&item, defaultVal) != 0) { return -1; } @@ -443,8 +443,8 @@ int32_t cfgAddLocale(SConfig *pCfg, const char *name, const char *defaultVal) { return cfgAddItem(pCfg, &item, name); } -int32_t cfgAddCharset(SConfig *pCfg, const char *name, const char *defaultVal) { - SConfigItem item = {.dtype = CFG_DTYPE_CHARSET, .tsc = 1}; +int32_t cfgAddCharset(SConfig *pCfg, const char *name, const char *defaultVal, int8_t scope) { + SConfigItem item = {.dtype = CFG_DTYPE_CHARSET, .scope = scope}; if (cfgCheckAndSetCharset(&item, defaultVal) != 0) { return -1; } @@ -452,8 +452,8 @@ int32_t cfgAddCharset(SConfig *pCfg, const char *name, const char *defaultVal) { return cfgAddItem(pCfg, &item, name); } -int32_t cfgAddTimezone(SConfig *pCfg, const char *name, const char *defaultVal) { - SConfigItem item = {.dtype = CFG_DTYPE_TIMEZONE, .tsc = 1}; +int32_t cfgAddTimezone(SConfig *pCfg, const char *name, const char *defaultVal, int8_t scope) { + SConfigItem item = {.dtype = CFG_DTYPE_TIMEZONE, .scope = scope}; if (cfgCheckAndSetTimezone(&item, defaultVal) != 0) { return -1; } @@ -543,6 +543,27 @@ void cfgDumpItemValue(SConfigItem *pItem, char *buf, int32_t bufSize, int32_t *p *pLen = len; } +void cfgDumpItemScope(SConfigItem *pItem, char *buf, int32_t bufSize, int32_t *pLen) { + int32_t len = 0; + switch (pItem->scope) { + case CFG_SCOPE_SERVER: + len = snprintf(buf, bufSize, "server"); + break; + case CFG_SCOPE_CLIENT: + len = snprintf(buf, bufSize, "client"); + break; + case CFG_SCOPE_BOTH: + len = snprintf(buf, bufSize, "both"); + break; + } + + if (len > bufSize) { + len = bufSize; + } + + *pLen = len; +} + void cfgDumpCfg(SConfig *pCfg, bool tsc, bool dump) { if (dump) { printf(" global config"); @@ -560,7 +581,7 @@ void cfgDumpCfg(SConfig *pCfg, bool tsc, bool dump) { int32_t size = taosArrayGetSize(pCfg->array); for (int32_t i = 0; i < size; ++i) { SConfigItem *pItem = taosArrayGet(pCfg->array, i); - if (tsc && !pItem->tsc) continue; + if (tsc && pItem->scope == CFG_SCOPE_SERVER) continue; if (dump && strcmp(pItem->name, "scriptDir") == 0) continue; if (dump && strcmp(pItem->name, "simDebugFlag") == 0) continue; tstrncpy(src, cfgStypeStr(pItem->stype), CFG_SRC_PRINT_LEN); diff --git a/source/util/src/terror.c b/source/util/src/terror.c index 8f5fbc0844897bb97182bb9a756135d43965048a..b0b407e2a5bbe402a8c67330c5f1ae644e6094b3 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -632,12 +632,18 @@ TAOS_DEFINE_ERROR(TSDB_CODE_SCALAR_CONVERT_ERROR, "Cannot convert to s //tmq TAOS_DEFINE_ERROR(TSDB_CODE_TMQ_INVALID_MSG, "Invalid message") +TAOS_DEFINE_ERROR(TSDB_CODE_TMQ_NEED_INITIALIZED, "Assignment or poll interface need to be called first") TAOS_DEFINE_ERROR(TSDB_CODE_TMQ_SNAPSHOT_ERROR, "Can not operate in snapshot mode") +TAOS_DEFINE_ERROR(TSDB_CODE_TMQ_NO_COMMITTED, "No committed info") +TAOS_DEFINE_ERROR(TSDB_CODE_TMQ_VERSION_OUT_OF_RANGE, "Offset out of range") +TAOS_DEFINE_ERROR(TSDB_CODE_TMQ_INVALID_VGID, "VgId does not belong to this consumer") +TAOS_DEFINE_ERROR(TSDB_CODE_TMQ_INVALID_TOPIC, "Topic does not belong to this consumer") TAOS_DEFINE_ERROR(TSDB_CODE_TMQ_CONSUMER_MISMATCH, "Consumer mismatch") TAOS_DEFINE_ERROR(TSDB_CODE_TMQ_CONSUMER_CLOSED, "Consumer closed") TAOS_DEFINE_ERROR(TSDB_CODE_TMQ_CONSUMER_ERROR, "Consumer error, to see log") TAOS_DEFINE_ERROR(TSDB_CODE_TMQ_TOPIC_OUT_OF_RANGE, "Topic num out of range") TAOS_DEFINE_ERROR(TSDB_CODE_TMQ_GROUP_OUT_OF_RANGE, "Group num out of range 100") +TAOS_DEFINE_ERROR(TSDB_CODE_TMQ_SAME_COMMITTED_VALUE, "Same committed value") // stream TAOS_DEFINE_ERROR(TSDB_CODE_STREAM_TASK_NOT_EXIST, "Stream task not exist") diff --git a/source/util/src/tlog.c b/source/util/src/tlog.c index c07bafa1ea27e723efd6fa8c4f0a045c649601fe..de7ad848ed5dd522bbab43bc9a40c921f1ee8d26 100644 --- a/source/util/src/tlog.c +++ b/source/util/src/tlog.c @@ -76,7 +76,11 @@ static int32_t tsDaylightActive; /* Currently in daylight saving time. */ bool tsLogEmbedded = 0; bool tsAsyncLog = true; +#ifdef ASSERT_NOT_CORE +bool tsAssert = false; +#else bool tsAssert = true; +#endif int32_t tsNumOfLogLines = 10000000; int32_t tsLogKeepDays = 0; LogFp tsLogFp = NULL; diff --git a/source/util/src/trbtree.c b/source/util/src/trbtree.c index e7386d5912dd83c5a76af3c902bcd910b5ffef87..e1000f7bc153176d76c676f81601f2a49f7d0213 100644 --- a/source/util/src/trbtree.c +++ b/source/util/src/trbtree.c @@ -105,7 +105,7 @@ static void tRBTreeTransplant(SRBTree *pTree, SRBTreeNode *u, SRBTreeNode *v) { v->parent = u->parent; } -static SRBTreeNode *tRBTreeSuccessor(SRBTree *pTree, SRBTreeNode *pNode) { +static SRBTreeNode *tRBTreeSuccessor(const SRBTree *pTree, SRBTreeNode *pNode) { if (pNode->right != pTree->NIL) { pNode = pNode->right; while (pNode->left != pTree->NIL) { @@ -125,7 +125,7 @@ static SRBTreeNode *tRBTreeSuccessor(SRBTree *pTree, SRBTreeNode *pNode) { return pNode; } -static SRBTreeNode *tRBTreePredecessor(SRBTree *pTree, SRBTreeNode *pNode) { +static SRBTreeNode *tRBTreePredecessor(const SRBTree *pTree, SRBTreeNode *pNode) { if (pNode->left != pTree->NIL) { pNode = pNode->left; while (pNode->right != pTree->NIL) { @@ -443,7 +443,7 @@ SRBTreeNode *tRBTreeDropMax(SRBTree *pTree) { return pNode; } -SRBTreeNode *tRBTreeGet(SRBTree *pTree, const SRBTreeNode *pKeyNode) { +SRBTreeNode *tRBTreeGet(const SRBTree *pTree, const SRBTreeNode *pKeyNode) { SRBTreeNode *pNode = pTree->root; while (pNode != pTree->NIL) { diff --git a/source/util/src/tutil.c b/source/util/src/tutil.c index 6d95660103e2c78203e7531af927e8d59ae4c358..6b6878ec83e9da35957c9c6aa5bbc6b37b1a404d 100644 --- a/source/util/src/tutil.c +++ b/source/util/src/tutil.c @@ -351,10 +351,10 @@ int32_t titoa(uint64_t val, size_t radix, char str[]) { int32_t i = 0; uint64_t v = val; - while(v > 0) { + do { buf[i++] = s[v % radix]; v /= radix; - } + } while (v > 0); // reverse order for(int32_t j = 0; j < i; ++j) { diff --git a/tests/develop-test/2-query/show_create_db.py b/tests/develop-test/2-query/show_create_db.py index d4bff819c990dfc09be3fb1127eb643a067a44fa..af31d7f03aa2aef4f533e3701b5f4bc7200c15ad 100644 --- a/tests/develop-test/2-query/show_create_db.py +++ b/tests/develop-test/2-query/show_create_db.py @@ -42,17 +42,17 @@ class TDTestCase: tdSql.query('show create database scd;') tdSql.checkRows(1) tdSql.checkData(0, 0, 'scd') - tdSql.checkData(0, 1, "CREATE DATABASE `scd` BUFFER 256 CACHESIZE 1 CACHEMODEL 'none' COMP 2 DURATION 14400m WAL_FSYNC_PERIOD 3000 MAXROWS 4096 MINROWS 100 STT_TRIGGER 1 KEEP 5256000m,5256000m,5256000m PAGES 256 PAGESIZE 4 PRECISION 'ms' REPLICA 1 WAL_LEVEL 1 VGROUPS 2 SINGLE_STABLE 0 TABLE_PREFIX 0 TABLE_SUFFIX 0 TSDB_PAGESIZE 4 WAL_RETENTION_PERIOD 0 WAL_RETENTION_SIZE 0") + tdSql.checkData(0, 1, "CREATE DATABASE `scd` BUFFER 256 CACHESIZE 1 CACHEMODEL 'none' COMP 2 DURATION 14400m WAL_FSYNC_PERIOD 3000 MAXROWS 4096 MINROWS 100 STT_TRIGGER 2 KEEP 5256000m,5256000m,5256000m PAGES 256 PAGESIZE 4 PRECISION 'ms' REPLICA 1 WAL_LEVEL 1 VGROUPS 2 SINGLE_STABLE 0 TABLE_PREFIX 0 TABLE_SUFFIX 0 TSDB_PAGESIZE 4 WAL_RETENTION_PERIOD 3600 WAL_RETENTION_SIZE 0") tdSql.query('show create database scd2;') tdSql.checkRows(1) tdSql.checkData(0, 0, 'scd2') - tdSql.checkData(0, 1, "CREATE DATABASE `scd2` BUFFER 256 CACHESIZE 1 CACHEMODEL 'none' COMP 2 DURATION 14400m WAL_FSYNC_PERIOD 3000 MAXROWS 4096 MINROWS 100 STT_TRIGGER 3 KEEP 5256000m,5256000m,5256000m PAGES 256 PAGESIZE 4 PRECISION 'ms' REPLICA 1 WAL_LEVEL 1 VGROUPS 2 SINGLE_STABLE 0 TABLE_PREFIX 0 TABLE_SUFFIX 0 TSDB_PAGESIZE 4 WAL_RETENTION_PERIOD 0 WAL_RETENTION_SIZE 0") + tdSql.checkData(0, 1, "CREATE DATABASE `scd2` BUFFER 256 CACHESIZE 1 CACHEMODEL 'none' COMP 2 DURATION 14400m WAL_FSYNC_PERIOD 3000 MAXROWS 4096 MINROWS 100 STT_TRIGGER 3 KEEP 5256000m,5256000m,5256000m PAGES 256 PAGESIZE 4 PRECISION 'ms' REPLICA 1 WAL_LEVEL 1 VGROUPS 2 SINGLE_STABLE 0 TABLE_PREFIX 0 TABLE_SUFFIX 0 TSDB_PAGESIZE 4 WAL_RETENTION_PERIOD 3600 WAL_RETENTION_SIZE 0") tdSql.query('show create database scd4') tdSql.checkRows(1) tdSql.checkData(0, 0, 'scd4') - tdSql.checkData(0, 1, "CREATE DATABASE `scd4` BUFFER 256 CACHESIZE 1 CACHEMODEL 'none' COMP 2 DURATION 14400m WAL_FSYNC_PERIOD 3000 MAXROWS 4096 MINROWS 100 STT_TRIGGER 13 KEEP 5256000m,5256000m,5256000m PAGES 256 PAGESIZE 4 PRECISION 'ms' REPLICA 1 WAL_LEVEL 1 VGROUPS 2 SINGLE_STABLE 0 TABLE_PREFIX 0 TABLE_SUFFIX 0 TSDB_PAGESIZE 4 WAL_RETENTION_PERIOD 0 WAL_RETENTION_SIZE 0") + tdSql.checkData(0, 1, "CREATE DATABASE `scd4` BUFFER 256 CACHESIZE 1 CACHEMODEL 'none' COMP 2 DURATION 14400m WAL_FSYNC_PERIOD 3000 MAXROWS 4096 MINROWS 100 STT_TRIGGER 13 KEEP 5256000m,5256000m,5256000m PAGES 256 PAGESIZE 4 PRECISION 'ms' REPLICA 1 WAL_LEVEL 1 VGROUPS 2 SINGLE_STABLE 0 TABLE_PREFIX 0 TABLE_SUFFIX 0 TSDB_PAGESIZE 4 WAL_RETENTION_PERIOD 3600 WAL_RETENTION_SIZE 0") self.restartTaosd(1, dbname='scd') @@ -60,17 +60,17 @@ class TDTestCase: tdSql.query('show create database scd;') tdSql.checkRows(1) tdSql.checkData(0, 0, 'scd') - tdSql.checkData(0, 1, "CREATE DATABASE `scd` BUFFER 256 CACHESIZE 1 CACHEMODEL 'none' COMP 2 DURATION 14400m WAL_FSYNC_PERIOD 3000 MAXROWS 4096 MINROWS 100 STT_TRIGGER 1 KEEP 5256000m,5256000m,5256000m PAGES 256 PAGESIZE 4 PRECISION 'ms' REPLICA 1 WAL_LEVEL 1 VGROUPS 2 SINGLE_STABLE 0 TABLE_PREFIX 0 TABLE_SUFFIX 0 TSDB_PAGESIZE 4 WAL_RETENTION_PERIOD 0 WAL_RETENTION_SIZE 0") + tdSql.checkData(0, 1, "CREATE DATABASE `scd` BUFFER 256 CACHESIZE 1 CACHEMODEL 'none' COMP 2 DURATION 14400m WAL_FSYNC_PERIOD 3000 MAXROWS 4096 MINROWS 100 STT_TRIGGER 2 KEEP 5256000m,5256000m,5256000m PAGES 256 PAGESIZE 4 PRECISION 'ms' REPLICA 1 WAL_LEVEL 1 VGROUPS 2 SINGLE_STABLE 0 TABLE_PREFIX 0 TABLE_SUFFIX 0 TSDB_PAGESIZE 4 WAL_RETENTION_PERIOD 3600 WAL_RETENTION_SIZE 0") tdSql.query('show create database scd2;') tdSql.checkRows(1) tdSql.checkData(0, 0, 'scd2') - tdSql.checkData(0, 1, "CREATE DATABASE `scd2` BUFFER 256 CACHESIZE 1 CACHEMODEL 'none' COMP 2 DURATION 14400m WAL_FSYNC_PERIOD 3000 MAXROWS 4096 MINROWS 100 STT_TRIGGER 3 KEEP 5256000m,5256000m,5256000m PAGES 256 PAGESIZE 4 PRECISION 'ms' REPLICA 1 WAL_LEVEL 1 VGROUPS 2 SINGLE_STABLE 0 TABLE_PREFIX 0 TABLE_SUFFIX 0 TSDB_PAGESIZE 4 WAL_RETENTION_PERIOD 0 WAL_RETENTION_SIZE 0") + tdSql.checkData(0, 1, "CREATE DATABASE `scd2` BUFFER 256 CACHESIZE 1 CACHEMODEL 'none' COMP 2 DURATION 14400m WAL_FSYNC_PERIOD 3000 MAXROWS 4096 MINROWS 100 STT_TRIGGER 3 KEEP 5256000m,5256000m,5256000m PAGES 256 PAGESIZE 4 PRECISION 'ms' REPLICA 1 WAL_LEVEL 1 VGROUPS 2 SINGLE_STABLE 0 TABLE_PREFIX 0 TABLE_SUFFIX 0 TSDB_PAGESIZE 4 WAL_RETENTION_PERIOD 3600 WAL_RETENTION_SIZE 0") tdSql.query('show create database scd4') tdSql.checkRows(1) tdSql.checkData(0, 0, 'scd4') - tdSql.checkData(0, 1, "CREATE DATABASE `scd4` BUFFER 256 CACHESIZE 1 CACHEMODEL 'none' COMP 2 DURATION 14400m WAL_FSYNC_PERIOD 3000 MAXROWS 4096 MINROWS 100 STT_TRIGGER 13 KEEP 5256000m,5256000m,5256000m PAGES 256 PAGESIZE 4 PRECISION 'ms' REPLICA 1 WAL_LEVEL 1 VGROUPS 2 SINGLE_STABLE 0 TABLE_PREFIX 0 TABLE_SUFFIX 0 TSDB_PAGESIZE 4 WAL_RETENTION_PERIOD 0 WAL_RETENTION_SIZE 0") + tdSql.checkData(0, 1, "CREATE DATABASE `scd4` BUFFER 256 CACHESIZE 1 CACHEMODEL 'none' COMP 2 DURATION 14400m WAL_FSYNC_PERIOD 3000 MAXROWS 4096 MINROWS 100 STT_TRIGGER 13 KEEP 5256000m,5256000m,5256000m PAGES 256 PAGESIZE 4 PRECISION 'ms' REPLICA 1 WAL_LEVEL 1 VGROUPS 2 SINGLE_STABLE 0 TABLE_PREFIX 0 TABLE_SUFFIX 0 TSDB_PAGESIZE 4 WAL_RETENTION_PERIOD 3600 WAL_RETENTION_SIZE 0") tdSql.execute('drop database scd') diff --git a/tests/parallel_test/cases.task b/tests/parallel_test/cases.task index 620748bc2672ebc6cdcc998de35a0af2b1ced77e..89572d1c068eea03023929f0ef6ba04dfeda9bb5 100644 --- a/tests/parallel_test/cases.task +++ b/tests/parallel_test/cases.task @@ -38,6 +38,7 @@ ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/tmqClientConsLog.py ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/tmqMaxGroupIds.py ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/tmqConsumeDiscontinuousData.py +,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/tmqOffset.py ,,n,system-test,python3 ./test.py -f 7-tmq/tmqDropConsumer.py @@ -104,7 +105,7 @@ ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/tmqConsFromTsdb-mutilVg-mutilCtb-funcNFilter.py ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/tmqConsFromTsdb-mutilVg-mutilCtb.py ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/tmqConsFromTsdb1-1ctb-funcNFilter.py -#,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/tmqConsFromTsdb1-mutilVg-mutilCtb-funcNFilter.py +,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/tmqConsFromTsdb1-mutilVg-mutilCtb-funcNFilter.py ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/tmqConsFromTsdb1-mutilVg-mutilCtb.py ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/tmqAutoCreateTbl.py ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/tmqDnodeRestart.py @@ -160,6 +161,7 @@ ,,n,system-test,python3 ./test.py -f 0-others/udfpy_main.py ,,n,system-test,python3 ./test.py -N 3 -f 0-others/walRetention.py ,,n,system-test,python3 ./test.py -f 0-others/splitVGroup.py -N 5 +,,n,system-test,python3 ./test.py -f 0-others/timeRangeWise.py -N 3 ,,y,system-test,./pytest.sh python3 ./test.py -f 1-insert/alter_database.py ,,y,system-test,./pytest.sh python3 ./test.py -f 1-insert/alter_replica.py -N 3 ,,y,system-test,./pytest.sh python3 ./test.py -f 1-insert/influxdb_line_taosc_insert.py @@ -342,6 +344,8 @@ ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/smaBasic.py -N 3 ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/smaTest.py ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/smaTest.py -R +,,y,system-test,./pytest.sh python3 ./test.py -f 0-others/sma_index.py +,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/sml_TS-3724.py ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/sml.py ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/sml.py -R ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/spread.py @@ -447,7 +451,7 @@ ,,y,system-test,./pytest.sh python3 ./test.py -f 6-cluster/5dnode3mnodeRestartDnodeInsertDataAsync.py -N 6 -M 3 #,,y,system-test,./pytest.sh python3 ./test.py -f 6-cluster/5dnode3mnodeRestartDnodeInsertDataAsync.py -N 6 -M 3 -n 3 ,,n,system-test,python3 ./test.py -f 6-cluster/manually-test/6dnode3mnodeInsertLessDataAlterRep3to1to3.py -N 6 -M 3 - +#,,n,system-test,python ./test.py -f 6-cluster/5dnode3mnodeRoll.py -N 3 -C 1 ,,y,system-test,./pytest.sh python3 ./test.py -f 6-cluster/5dnode3mnodeAdd1Ddnoe.py -N 7 -M 3 -C 6 ,,y,system-test,./pytest.sh python3 ./test.py -f 6-cluster/5dnode3mnodeAdd1Ddnoe.py -N 7 -M 3 -C 6 -n 3 #,,y,system-test,./pytest.sh python3 ./test.py -f 6-cluster/5dnode3mnodeDrop.py -N 5 diff --git a/tests/pytest/util/dnodes.py b/tests/pytest/util/dnodes.py index 543433b4ea6a672f10fb341b113d1031f07a1fb2..89e3df81b938d666e670f9096638ad0da91fa569 100644 --- a/tests/pytest/util/dnodes.py +++ b/tests/pytest/util/dnodes.py @@ -545,6 +545,8 @@ class TDDnode: def stoptaosd(self): + tdLog.debug("start to stop taosd on dnode: %d "% (self.index)) + # print(self.asan,self.running,self.remoteIP,self.valgrind) if self.asan: stopCmd = "%s -s stop -n dnode%d" % (self.execPath, self.index) tdLog.info("execute script: " + stopCmd) diff --git a/tests/pytest/util/sql.py b/tests/pytest/util/sql.py index 2af8f721b68771231f63888ba6e622be30d53b81..2fa21b1983824de33b713d9b59008a69f9ffb8d3 100644 --- a/tests/pytest/util/sql.py +++ b/tests/pytest/util/sql.py @@ -440,8 +440,10 @@ class TDSql: time.sleep(1) continue - def execute(self, sql,queryTimes=30): + def execute(self, sql, queryTimes=30, show=False): self.sql = sql + if show: + tdLog.info(sql) i=1 while i <= queryTimes: try: diff --git a/tests/script/sh/stop_dnodes.bat b/tests/script/sh/stop_dnodes.bat index ab7af2ca92023745b8b712cb78e9b168a6c00598..65aee26ed4f575ce446ba02055713f69f6082191 100644 --- a/tests/script/sh/stop_dnodes.bat +++ b/tests/script/sh/stop_dnodes.bat @@ -3,4 +3,6 @@ rem echo taskkill /F /IM taosd.exe wmic process where "name='taosd.exe'" call terminate > NUL 2>&1 -taskkill /F /IM taosd.exe > NUL 2>&1 \ No newline at end of file +taskkill /F /IM taosd.exe > NUL 2>&1 + +rem echo taskkill /F /IM taosd.exe finished \ No newline at end of file diff --git a/tests/script/tsim/db/alter_option.sim b/tests/script/tsim/db/alter_option.sim index a16b39f50b3a3eec7144259ea5f81697a1d49dff..6c98d43794d5bb9a692e9a52e6a341e7e53df9f9 100644 --- a/tests/script/tsim/db/alter_option.sim +++ b/tests/script/tsim/db/alter_option.sim @@ -111,7 +111,7 @@ endi if $data21_db != 1000 then # wal_level fsyncperiod return -1 endi -if $data22_db != 0 then # wal_retention_period +if $data22_db != 3600 then # wal_retention_period return -1 endi if $data23_db != 0 then # wal_retention_size diff --git a/tests/script/tsim/parser/fill.sim b/tests/script/tsim/parser/fill.sim index a66e7d6ab72e0c85a85035565c2e8fd9da70b0af..0534aa5d5b762c24bb50bba9c52b6733a2d7b429 100644 --- a/tests/script/tsim/parser/fill.sim +++ b/tests/script/tsim/parser/fill.sim @@ -1224,4 +1224,104 @@ if $data42 != NULL then return -1 endi +print ===================== TD-3625 test fill value NULL +sql use $db + +sql select _wstart,_wend,count(*) from tm0 where ts >= '2020-01-01 01:03:06.000' and ts <= '2020-01-01 01:03:10.000' interval(1s) fill(value, NULL); + +if $rows != 5 then + return -1 +endi + +if $data02 != NULL then + return -1 +endi + +if $data12 != 1 then + return -1 +endi + +if $data22 != 1 then + return -1 +endi + +if $data32 != 1 then + return -1 +endi + +if $data42 != NULL then + return -1 +endi + +sql select _wstart,_wend,count(*),sum(k),avg(k) from tm0 where ts >= '2020-01-01 01:03:06.000' and ts <= '2020-01-01 01:03:10.000' interval(1s) fill(value, 1, NULL, 1); + +if $rows != 5 then + return -1 +endi + +if $data02 != 1 then + return -1 +endi + +if $data12 != 1 then + return -1 +endi + +if $data22 != 1 then + return -1 +endi + +if $data32 != 1 then + return -1 +endi + +if $data42 != 1 then + return -1 +endi + + +if $data03 != NULL then + return -1 +endi + +if $data13 != 7 then + return -1 +endi + +if $data23 != 8 then + return -1 +endi + +if $data33 != 9 then + return -1 +endi + +if $data43 != NULL then + return -1 +endi + + +if $data04 != 1.000000000 then + return -1 +endi + +if $data14 != 7.000000000 then + return -1 +endi + +if $data24 != 8.000000000 then + return -1 +endi + +if $data34 != 9.000000000 then + return -1 +endi + +if $data44 != 1.000000000 then + return -1 +endi + + system sh/exec.sh -n dnode1 -s stop -x SIGINT + + diff --git a/tests/script/tsim/query/r/explain_tsorder.result b/tests/script/tsim/query/r/explain_tsorder.result index b69a77ada52d5f6c819a5edb5a292a027d9f320e..25f1241ffd9479408a327fc0aced053e43ffa5cb 100644 --- a/tests/script/tsim/query/r/explain_tsorder.result +++ b/tests/script/tsim/query/r/explain_tsorder.result @@ -2798,3 +2798,1163 @@ taos> select last(ts) as ts, c2 as d from d1 group by c2 order by ts desc, c2 as ======================================== 2022-05-15 00:01:08.000 | 234 | +taos> explain verbose true select _wstart, _wend, count(*) from meters event_window start with c2 > 0 end with c2 < 100\G; +*************************** 1.row *************************** +QUERY_PLAN: -> Event (functions=3 width=24) +*************************** 2.row *************************** +QUERY_PLAN: Start Cond: (`test`.`meters`.`c2` > 0) +*************************** 3.row *************************** +QUERY_PLAN: End Cond: (`test`.`meters`.`c2` < 100) +*************************** 4.row *************************** +QUERY_PLAN: -> SortMerge (columns=2 width=12 input_order=unknown output_order=unknown) +*************************** 5.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 6.row *************************** +QUERY_PLAN: Output: Ignore Group Id: false +*************************** 7.row *************************** +QUERY_PLAN: Merge Key: _group_id asc, ts asc +*************************** 8.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=12) +*************************** 9.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 10.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=2 width=12 order=[asc|1 desc|0]) +*************************** 11.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 12.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] +*************************** 13.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=12) +*************************** 14.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 15.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=2 width=12 order=[asc|1 desc|0]) +*************************** 16.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 17.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] + +taos> explain verbose true select _wstart, _wend, count(*) from meters event_window start with c2 > 0 end with c2 < 100 order by _wstart desc\G; +*************************** 1.row *************************** +QUERY_PLAN: -> Sort input_order=asc output_order=desc (columns=3 width=24) +*************************** 2.row *************************** +QUERY_PLAN: Output: columns=3 width=24 +*************************** 3.row *************************** +QUERY_PLAN: -> Event (functions=3 width=24) +*************************** 4.row *************************** +QUERY_PLAN: Start Cond: (`test`.`meters`.`c2` > 0) +*************************** 5.row *************************** +QUERY_PLAN: End Cond: (`test`.`meters`.`c2` < 100) +*************************** 6.row *************************** +QUERY_PLAN: -> SortMerge (columns=2 width=12 input_order=unknown output_order=unknown) +*************************** 7.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 8.row *************************** +QUERY_PLAN: Output: Ignore Group Id: false +*************************** 9.row *************************** +QUERY_PLAN: Merge Key: _group_id asc, ts asc +*************************** 10.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=12) +*************************** 11.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 12.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=2 width=12 order=[asc|1 desc|0]) +*************************** 13.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 14.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] +*************************** 15.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=12) +*************************** 16.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 17.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=2 width=12 order=[asc|1 desc|0]) +*************************** 18.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 19.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] + +taos> explain verbose true select _wstart, _wend, count(*) from meters event_window start with c2 > 0 end with c2 < 100 order by _wstart asc\G; +*************************** 1.row *************************** +QUERY_PLAN: -> Event (functions=3 width=24) +*************************** 2.row *************************** +QUERY_PLAN: Start Cond: (`test`.`meters`.`c2` > 0) +*************************** 3.row *************************** +QUERY_PLAN: End Cond: (`test`.`meters`.`c2` < 100) +*************************** 4.row *************************** +QUERY_PLAN: -> SortMerge (columns=2 width=12 input_order=unknown output_order=unknown) +*************************** 5.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 6.row *************************** +QUERY_PLAN: Output: Ignore Group Id: false +*************************** 7.row *************************** +QUERY_PLAN: Merge Key: _group_id asc, ts asc +*************************** 8.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=12) +*************************** 9.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 10.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=2 width=12 order=[asc|1 desc|0]) +*************************** 11.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 12.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] +*************************** 13.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=12) +*************************** 14.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 15.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=2 width=12 order=[asc|1 desc|0]) +*************************** 16.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 17.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] + +taos> explain verbose true select _wstart, _wend, count(*) from meters event_window start with c2 > 0 end with c2 < 100 order by _wend desc\G; +*************************** 1.row *************************** +QUERY_PLAN: -> Sort input_order=asc output_order=desc (columns=3 width=24) +*************************** 2.row *************************** +QUERY_PLAN: Output: columns=3 width=24 +*************************** 3.row *************************** +QUERY_PLAN: -> Event (functions=3 width=24) +*************************** 4.row *************************** +QUERY_PLAN: Start Cond: (`test`.`meters`.`c2` > 0) +*************************** 5.row *************************** +QUERY_PLAN: End Cond: (`test`.`meters`.`c2` < 100) +*************************** 6.row *************************** +QUERY_PLAN: -> SortMerge (columns=2 width=12 input_order=unknown output_order=unknown) +*************************** 7.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 8.row *************************** +QUERY_PLAN: Output: Ignore Group Id: false +*************************** 9.row *************************** +QUERY_PLAN: Merge Key: _group_id asc, ts asc +*************************** 10.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=12) +*************************** 11.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 12.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=2 width=12 order=[asc|1 desc|0]) +*************************** 13.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 14.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] +*************************** 15.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=12) +*************************** 16.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 17.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=2 width=12 order=[asc|1 desc|0]) +*************************** 18.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 19.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] + +taos> explain verbose true select _wstart, _wend, count(*) from meters event_window start with c2 > 0 end with c2 < 100 order by _wend asc\G; +*************************** 1.row *************************** +QUERY_PLAN: -> Event (functions=3 width=24) +*************************** 2.row *************************** +QUERY_PLAN: Start Cond: (`test`.`meters`.`c2` > 0) +*************************** 3.row *************************** +QUERY_PLAN: End Cond: (`test`.`meters`.`c2` < 100) +*************************** 4.row *************************** +QUERY_PLAN: -> SortMerge (columns=2 width=12 input_order=unknown output_order=unknown) +*************************** 5.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 6.row *************************** +QUERY_PLAN: Output: Ignore Group Id: false +*************************** 7.row *************************** +QUERY_PLAN: Merge Key: _group_id asc, ts asc +*************************** 8.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=12) +*************************** 9.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 10.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=2 width=12 order=[asc|1 desc|0]) +*************************** 11.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 12.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] +*************************** 13.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=12) +*************************** 14.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 15.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=2 width=12 order=[asc|1 desc|0]) +*************************** 16.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 17.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] + +taos> select _wstart, _wend, count(*) from meters event_window start with c2 > 0 end with c2 < 100; + _wstart | _wend | count(*) | +============================================================================ + 2022-05-15 00:01:08.000 | 2022-05-17 00:01:08.000 | 5 | + 2022-05-17 00:01:08.000 | 2022-05-17 00:01:08.000 | 1 | + 2022-05-18 00:01:08.000 | 2022-05-18 00:01:08.000 | 1 | + 2022-05-18 00:01:08.000 | 2022-05-18 00:01:08.000 | 1 | + 2022-05-19 00:01:08.000 | 2022-05-21 00:01:08.000 | 5 | + 2022-05-21 00:01:08.000 | 2022-05-21 00:01:08.000 | 1 | + +taos> select _wstart, _wend, count(*) from meters event_window start with c2 > 0 end with c2 < 100 order by _wstart desc; + _wstart | _wend | count(*) | +============================================================================ + 2022-05-21 00:01:08.000 | 2022-05-21 00:01:08.000 | 1 | + 2022-05-19 00:01:08.000 | 2022-05-21 00:01:08.000 | 5 | + 2022-05-18 00:01:08.000 | 2022-05-18 00:01:08.000 | 1 | + 2022-05-18 00:01:08.000 | 2022-05-18 00:01:08.000 | 1 | + 2022-05-17 00:01:08.000 | 2022-05-17 00:01:08.000 | 1 | + 2022-05-15 00:01:08.000 | 2022-05-17 00:01:08.000 | 5 | + +taos> select _wstart, _wend, count(*) from meters event_window start with c2 > 0 end with c2 < 100 order by _wstart asc; + _wstart | _wend | count(*) | +============================================================================ + 2022-05-15 00:01:08.000 | 2022-05-17 00:01:08.000 | 5 | + 2022-05-17 00:01:08.000 | 2022-05-17 00:01:08.000 | 1 | + 2022-05-18 00:01:08.000 | 2022-05-18 00:01:08.000 | 1 | + 2022-05-18 00:01:08.000 | 2022-05-18 00:01:08.000 | 1 | + 2022-05-19 00:01:08.000 | 2022-05-21 00:01:08.000 | 5 | + 2022-05-21 00:01:08.000 | 2022-05-21 00:01:08.000 | 1 | + +taos> select _wstart, _wend, count(*) from meters event_window start with c2 > 0 end with c2 < 100 order by _wend desc; + _wstart | _wend | count(*) | +============================================================================ + 2022-05-19 00:01:08.000 | 2022-05-21 00:01:08.000 | 5 | + 2022-05-21 00:01:08.000 | 2022-05-21 00:01:08.000 | 1 | + 2022-05-18 00:01:08.000 | 2022-05-18 00:01:08.000 | 1 | + 2022-05-18 00:01:08.000 | 2022-05-18 00:01:08.000 | 1 | + 2022-05-15 00:01:08.000 | 2022-05-17 00:01:08.000 | 5 | + 2022-05-17 00:01:08.000 | 2022-05-17 00:01:08.000 | 1 | + +taos> select _wstart, _wend, count(*) from meters event_window start with c2 > 0 end with c2 < 100 order by _wend asc; + _wstart | _wend | count(*) | +============================================================================ + 2022-05-15 00:01:08.000 | 2022-05-17 00:01:08.000 | 5 | + 2022-05-17 00:01:08.000 | 2022-05-17 00:01:08.000 | 1 | + 2022-05-18 00:01:08.000 | 2022-05-18 00:01:08.000 | 1 | + 2022-05-18 00:01:08.000 | 2022-05-18 00:01:08.000 | 1 | + 2022-05-19 00:01:08.000 | 2022-05-21 00:01:08.000 | 5 | + 2022-05-21 00:01:08.000 | 2022-05-21 00:01:08.000 | 1 | + +taos> explain verbose true select _wstart, _wend, count(*) from meters session(ts, 1h)\G; +*************************** 1.row *************************** +QUERY_PLAN: -> Session (functions=3 width=24) +*************************** 2.row *************************** +QUERY_PLAN: Output: columns=3 width=24 +*************************** 3.row *************************** +QUERY_PLAN: Window: gap=3600000 +*************************** 4.row *************************** +QUERY_PLAN: -> SortMerge (columns=1 width=8 input_order=unknown output_order=unknown) +*************************** 5.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 6.row *************************** +QUERY_PLAN: Output: Ignore Group Id: false +*************************** 7.row *************************** +QUERY_PLAN: Merge Key: _group_id asc, ts asc +*************************** 8.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=8) +*************************** 9.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 10.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=1 width=8 order=[asc|1 desc|0]) +*************************** 11.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 12.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] +*************************** 13.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=8) +*************************** 14.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 15.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=1 width=8 order=[asc|1 desc|0]) +*************************** 16.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 17.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] + +taos> explain verbose true select _wstart, _wend, count(*) from meters session(ts, 1h) order by _wstart desc\G; +*************************** 1.row *************************** +QUERY_PLAN: -> Sort input_order=asc output_order=desc (columns=3 width=24) +*************************** 2.row *************************** +QUERY_PLAN: Output: columns=3 width=24 +*************************** 3.row *************************** +QUERY_PLAN: -> Session (functions=3 width=24) +*************************** 4.row *************************** +QUERY_PLAN: Output: columns=3 width=24 +*************************** 5.row *************************** +QUERY_PLAN: Window: gap=3600000 +*************************** 6.row *************************** +QUERY_PLAN: -> SortMerge (columns=1 width=8 input_order=unknown output_order=unknown) +*************************** 7.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 8.row *************************** +QUERY_PLAN: Output: Ignore Group Id: false +*************************** 9.row *************************** +QUERY_PLAN: Merge Key: _group_id asc, ts asc +*************************** 10.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=8) +*************************** 11.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 12.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=1 width=8 order=[asc|1 desc|0]) +*************************** 13.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 14.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] +*************************** 15.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=8) +*************************** 16.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 17.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=1 width=8 order=[asc|1 desc|0]) +*************************** 18.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 19.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] + +taos> explain verbose true select _wstart, _wend, count(*) from meters session(ts, 1h) order by _wstart asc\G; +*************************** 1.row *************************** +QUERY_PLAN: -> Session (functions=3 width=24) +*************************** 2.row *************************** +QUERY_PLAN: Output: columns=3 width=24 +*************************** 3.row *************************** +QUERY_PLAN: Window: gap=3600000 +*************************** 4.row *************************** +QUERY_PLAN: -> SortMerge (columns=1 width=8 input_order=unknown output_order=unknown) +*************************** 5.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 6.row *************************** +QUERY_PLAN: Output: Ignore Group Id: false +*************************** 7.row *************************** +QUERY_PLAN: Merge Key: _group_id asc, ts asc +*************************** 8.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=8) +*************************** 9.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 10.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=1 width=8 order=[asc|1 desc|0]) +*************************** 11.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 12.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] +*************************** 13.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=8) +*************************** 14.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 15.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=1 width=8 order=[asc|1 desc|0]) +*************************** 16.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 17.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] + +taos> explain verbose true select _wstart, _wend, count(*) from meters session(ts, 1h) order by _wend desc\G; +*************************** 1.row *************************** +QUERY_PLAN: -> Sort input_order=asc output_order=desc (columns=3 width=24) +*************************** 2.row *************************** +QUERY_PLAN: Output: columns=3 width=24 +*************************** 3.row *************************** +QUERY_PLAN: -> Session (functions=3 width=24) +*************************** 4.row *************************** +QUERY_PLAN: Output: columns=3 width=24 +*************************** 5.row *************************** +QUERY_PLAN: Window: gap=3600000 +*************************** 6.row *************************** +QUERY_PLAN: -> SortMerge (columns=1 width=8 input_order=unknown output_order=unknown) +*************************** 7.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 8.row *************************** +QUERY_PLAN: Output: Ignore Group Id: false +*************************** 9.row *************************** +QUERY_PLAN: Merge Key: _group_id asc, ts asc +*************************** 10.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=8) +*************************** 11.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 12.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=1 width=8 order=[asc|1 desc|0]) +*************************** 13.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 14.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] +*************************** 15.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=8) +*************************** 16.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 17.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=1 width=8 order=[asc|1 desc|0]) +*************************** 18.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 19.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] + +taos> explain verbose true select _wstart, _wend, count(*) from meters session(ts, 1h) order by _wend asc\G; +*************************** 1.row *************************** +QUERY_PLAN: -> Session (functions=3 width=24) +*************************** 2.row *************************** +QUERY_PLAN: Output: columns=3 width=24 +*************************** 3.row *************************** +QUERY_PLAN: Window: gap=3600000 +*************************** 4.row *************************** +QUERY_PLAN: -> SortMerge (columns=1 width=8 input_order=unknown output_order=unknown) +*************************** 5.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 6.row *************************** +QUERY_PLAN: Output: Ignore Group Id: false +*************************** 7.row *************************** +QUERY_PLAN: Merge Key: _group_id asc, ts asc +*************************** 8.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=8) +*************************** 9.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 10.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=1 width=8 order=[asc|1 desc|0]) +*************************** 11.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 12.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] +*************************** 13.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=8) +*************************** 14.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 15.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=1 width=8 order=[asc|1 desc|0]) +*************************** 16.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 17.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] + +taos> select _wstart, _wend, count(*) from meters session(ts, 1h); + _wstart | _wend | count(*) | +============================================================================ + 2022-05-15 00:01:08.000 | 2022-05-15 00:01:08.000 | 2 | + 2022-05-16 00:01:08.000 | 2022-05-16 00:01:08.000 | 2 | + 2022-05-17 00:01:08.000 | 2022-05-17 00:01:08.000 | 2 | + 2022-05-18 00:01:08.000 | 2022-05-18 00:01:08.000 | 2 | + 2022-05-19 00:01:08.000 | 2022-05-19 00:01:08.000 | 2 | + 2022-05-20 00:01:08.000 | 2022-05-20 00:01:08.000 | 2 | + 2022-05-21 00:01:08.000 | 2022-05-21 00:01:08.000 | 2 | + 2022-05-22 00:01:08.000 | 2022-05-22 00:01:08.000 | 2 | + 2022-05-23 00:01:08.000 | 2022-05-23 00:01:08.000 | 2 | + 2022-05-24 00:01:08.000 | 2022-05-24 00:01:08.000 | 2 | + +taos> select _wstart, _wend, count(*) from meters session(ts, 1h) order by _wstart desc; + _wstart | _wend | count(*) | +============================================================================ + 2022-05-24 00:01:08.000 | 2022-05-24 00:01:08.000 | 2 | + 2022-05-23 00:01:08.000 | 2022-05-23 00:01:08.000 | 2 | + 2022-05-22 00:01:08.000 | 2022-05-22 00:01:08.000 | 2 | + 2022-05-21 00:01:08.000 | 2022-05-21 00:01:08.000 | 2 | + 2022-05-20 00:01:08.000 | 2022-05-20 00:01:08.000 | 2 | + 2022-05-19 00:01:08.000 | 2022-05-19 00:01:08.000 | 2 | + 2022-05-18 00:01:08.000 | 2022-05-18 00:01:08.000 | 2 | + 2022-05-17 00:01:08.000 | 2022-05-17 00:01:08.000 | 2 | + 2022-05-16 00:01:08.000 | 2022-05-16 00:01:08.000 | 2 | + 2022-05-15 00:01:08.000 | 2022-05-15 00:01:08.000 | 2 | + +taos> select _wstart, _wend, count(*) from meters session(ts, 1h) order by _wstart asc; + _wstart | _wend | count(*) | +============================================================================ + 2022-05-15 00:01:08.000 | 2022-05-15 00:01:08.000 | 2 | + 2022-05-16 00:01:08.000 | 2022-05-16 00:01:08.000 | 2 | + 2022-05-17 00:01:08.000 | 2022-05-17 00:01:08.000 | 2 | + 2022-05-18 00:01:08.000 | 2022-05-18 00:01:08.000 | 2 | + 2022-05-19 00:01:08.000 | 2022-05-19 00:01:08.000 | 2 | + 2022-05-20 00:01:08.000 | 2022-05-20 00:01:08.000 | 2 | + 2022-05-21 00:01:08.000 | 2022-05-21 00:01:08.000 | 2 | + 2022-05-22 00:01:08.000 | 2022-05-22 00:01:08.000 | 2 | + 2022-05-23 00:01:08.000 | 2022-05-23 00:01:08.000 | 2 | + 2022-05-24 00:01:08.000 | 2022-05-24 00:01:08.000 | 2 | + +taos> select _wstart, _wend, count(*) from meters session(ts, 1h) order by _wend desc; + _wstart | _wend | count(*) | +============================================================================ + 2022-05-24 00:01:08.000 | 2022-05-24 00:01:08.000 | 2 | + 2022-05-23 00:01:08.000 | 2022-05-23 00:01:08.000 | 2 | + 2022-05-22 00:01:08.000 | 2022-05-22 00:01:08.000 | 2 | + 2022-05-21 00:01:08.000 | 2022-05-21 00:01:08.000 | 2 | + 2022-05-20 00:01:08.000 | 2022-05-20 00:01:08.000 | 2 | + 2022-05-19 00:01:08.000 | 2022-05-19 00:01:08.000 | 2 | + 2022-05-18 00:01:08.000 | 2022-05-18 00:01:08.000 | 2 | + 2022-05-17 00:01:08.000 | 2022-05-17 00:01:08.000 | 2 | + 2022-05-16 00:01:08.000 | 2022-05-16 00:01:08.000 | 2 | + 2022-05-15 00:01:08.000 | 2022-05-15 00:01:08.000 | 2 | + +taos> select _wstart, _wend, count(*) from meters session(ts, 1h) order by _wend asc; + _wstart | _wend | count(*) | +============================================================================ + 2022-05-15 00:01:08.000 | 2022-05-15 00:01:08.000 | 2 | + 2022-05-16 00:01:08.000 | 2022-05-16 00:01:08.000 | 2 | + 2022-05-17 00:01:08.000 | 2022-05-17 00:01:08.000 | 2 | + 2022-05-18 00:01:08.000 | 2022-05-18 00:01:08.000 | 2 | + 2022-05-19 00:01:08.000 | 2022-05-19 00:01:08.000 | 2 | + 2022-05-20 00:01:08.000 | 2022-05-20 00:01:08.000 | 2 | + 2022-05-21 00:01:08.000 | 2022-05-21 00:01:08.000 | 2 | + 2022-05-22 00:01:08.000 | 2022-05-22 00:01:08.000 | 2 | + 2022-05-23 00:01:08.000 | 2022-05-23 00:01:08.000 | 2 | + 2022-05-24 00:01:08.000 | 2022-05-24 00:01:08.000 | 2 | + +taos> explain verbose true select _wstart, _wend, count(*) from meters session(ts, 1h)\G; +*************************** 1.row *************************** +QUERY_PLAN: -> Session (functions=3 width=24) +*************************** 2.row *************************** +QUERY_PLAN: Output: columns=3 width=24 +*************************** 3.row *************************** +QUERY_PLAN: Window: gap=3600000 +*************************** 4.row *************************** +QUERY_PLAN: -> SortMerge (columns=1 width=8 input_order=unknown output_order=unknown) +*************************** 5.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 6.row *************************** +QUERY_PLAN: Output: Ignore Group Id: false +*************************** 7.row *************************** +QUERY_PLAN: Merge Key: _group_id asc, ts asc +*************************** 8.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=8) +*************************** 9.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 10.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=1 width=8 order=[asc|1 desc|0]) +*************************** 11.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 12.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] +*************************** 13.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=8) +*************************** 14.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 15.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=1 width=8 order=[asc|1 desc|0]) +*************************** 16.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 17.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] + +taos> explain verbose true select _wstart, _wend, count(*) from meters session(ts, 1h) order by _wstart desc\G; +*************************** 1.row *************************** +QUERY_PLAN: -> Sort input_order=asc output_order=desc (columns=3 width=24) +*************************** 2.row *************************** +QUERY_PLAN: Output: columns=3 width=24 +*************************** 3.row *************************** +QUERY_PLAN: -> Session (functions=3 width=24) +*************************** 4.row *************************** +QUERY_PLAN: Output: columns=3 width=24 +*************************** 5.row *************************** +QUERY_PLAN: Window: gap=3600000 +*************************** 6.row *************************** +QUERY_PLAN: -> SortMerge (columns=1 width=8 input_order=unknown output_order=unknown) +*************************** 7.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 8.row *************************** +QUERY_PLAN: Output: Ignore Group Id: false +*************************** 9.row *************************** +QUERY_PLAN: Merge Key: _group_id asc, ts asc +*************************** 10.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=8) +*************************** 11.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 12.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=1 width=8 order=[asc|1 desc|0]) +*************************** 13.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 14.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] +*************************** 15.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=8) +*************************** 16.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 17.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=1 width=8 order=[asc|1 desc|0]) +*************************** 18.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 19.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] + +taos> explain verbose true select _wstart, _wend, count(*) from meters session(ts, 1h) order by _wstart asc\G; +*************************** 1.row *************************** +QUERY_PLAN: -> Session (functions=3 width=24) +*************************** 2.row *************************** +QUERY_PLAN: Output: columns=3 width=24 +*************************** 3.row *************************** +QUERY_PLAN: Window: gap=3600000 +*************************** 4.row *************************** +QUERY_PLAN: -> SortMerge (columns=1 width=8 input_order=unknown output_order=unknown) +*************************** 5.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 6.row *************************** +QUERY_PLAN: Output: Ignore Group Id: false +*************************** 7.row *************************** +QUERY_PLAN: Merge Key: _group_id asc, ts asc +*************************** 8.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=8) +*************************** 9.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 10.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=1 width=8 order=[asc|1 desc|0]) +*************************** 11.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 12.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] +*************************** 13.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=8) +*************************** 14.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 15.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=1 width=8 order=[asc|1 desc|0]) +*************************** 16.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 17.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] + +taos> explain verbose true select _wstart, _wend, count(*) from meters session(ts, 1h) order by _wend desc\G; +*************************** 1.row *************************** +QUERY_PLAN: -> Sort input_order=asc output_order=desc (columns=3 width=24) +*************************** 2.row *************************** +QUERY_PLAN: Output: columns=3 width=24 +*************************** 3.row *************************** +QUERY_PLAN: -> Session (functions=3 width=24) +*************************** 4.row *************************** +QUERY_PLAN: Output: columns=3 width=24 +*************************** 5.row *************************** +QUERY_PLAN: Window: gap=3600000 +*************************** 6.row *************************** +QUERY_PLAN: -> SortMerge (columns=1 width=8 input_order=unknown output_order=unknown) +*************************** 7.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 8.row *************************** +QUERY_PLAN: Output: Ignore Group Id: false +*************************** 9.row *************************** +QUERY_PLAN: Merge Key: _group_id asc, ts asc +*************************** 10.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=8) +*************************** 11.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 12.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=1 width=8 order=[asc|1 desc|0]) +*************************** 13.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 14.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] +*************************** 15.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=8) +*************************** 16.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 17.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=1 width=8 order=[asc|1 desc|0]) +*************************** 18.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 19.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] + +taos> explain verbose true select _wstart, _wend, count(*) from meters session(ts, 1h) order by _wend asc\G; +*************************** 1.row *************************** +QUERY_PLAN: -> Session (functions=3 width=24) +*************************** 2.row *************************** +QUERY_PLAN: Output: columns=3 width=24 +*************************** 3.row *************************** +QUERY_PLAN: Window: gap=3600000 +*************************** 4.row *************************** +QUERY_PLAN: -> SortMerge (columns=1 width=8 input_order=unknown output_order=unknown) +*************************** 5.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 6.row *************************** +QUERY_PLAN: Output: Ignore Group Id: false +*************************** 7.row *************************** +QUERY_PLAN: Merge Key: _group_id asc, ts asc +*************************** 8.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=8) +*************************** 9.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 10.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=1 width=8 order=[asc|1 desc|0]) +*************************** 11.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 12.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] +*************************** 13.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=8) +*************************** 14.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 15.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=1 width=8 order=[asc|1 desc|0]) +*************************** 16.row *************************** +QUERY_PLAN: Output: columns=1 width=8 +*************************** 17.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] + +taos> select _wstart, _wend, count(*) from meters session(ts, 1h); + _wstart | _wend | count(*) | +============================================================================ + 2022-05-15 00:01:08.000 | 2022-05-15 00:01:08.000 | 2 | + 2022-05-16 00:01:08.000 | 2022-05-16 00:01:08.000 | 2 | + 2022-05-17 00:01:08.000 | 2022-05-17 00:01:08.000 | 2 | + 2022-05-18 00:01:08.000 | 2022-05-18 00:01:08.000 | 2 | + 2022-05-19 00:01:08.000 | 2022-05-19 00:01:08.000 | 2 | + 2022-05-20 00:01:08.000 | 2022-05-20 00:01:08.000 | 2 | + 2022-05-21 00:01:08.000 | 2022-05-21 00:01:08.000 | 2 | + 2022-05-22 00:01:08.000 | 2022-05-22 00:01:08.000 | 2 | + 2022-05-23 00:01:08.000 | 2022-05-23 00:01:08.000 | 2 | + 2022-05-24 00:01:08.000 | 2022-05-24 00:01:08.000 | 2 | + +taos> select _wstart, _wend, count(*) from meters session(ts, 1h) order by _wstart desc; + _wstart | _wend | count(*) | +============================================================================ + 2022-05-24 00:01:08.000 | 2022-05-24 00:01:08.000 | 2 | + 2022-05-23 00:01:08.000 | 2022-05-23 00:01:08.000 | 2 | + 2022-05-22 00:01:08.000 | 2022-05-22 00:01:08.000 | 2 | + 2022-05-21 00:01:08.000 | 2022-05-21 00:01:08.000 | 2 | + 2022-05-20 00:01:08.000 | 2022-05-20 00:01:08.000 | 2 | + 2022-05-19 00:01:08.000 | 2022-05-19 00:01:08.000 | 2 | + 2022-05-18 00:01:08.000 | 2022-05-18 00:01:08.000 | 2 | + 2022-05-17 00:01:08.000 | 2022-05-17 00:01:08.000 | 2 | + 2022-05-16 00:01:08.000 | 2022-05-16 00:01:08.000 | 2 | + 2022-05-15 00:01:08.000 | 2022-05-15 00:01:08.000 | 2 | + +taos> select _wstart, _wend, count(*) from meters session(ts, 1h) order by _wstart asc; + _wstart | _wend | count(*) | +============================================================================ + 2022-05-15 00:01:08.000 | 2022-05-15 00:01:08.000 | 2 | + 2022-05-16 00:01:08.000 | 2022-05-16 00:01:08.000 | 2 | + 2022-05-17 00:01:08.000 | 2022-05-17 00:01:08.000 | 2 | + 2022-05-18 00:01:08.000 | 2022-05-18 00:01:08.000 | 2 | + 2022-05-19 00:01:08.000 | 2022-05-19 00:01:08.000 | 2 | + 2022-05-20 00:01:08.000 | 2022-05-20 00:01:08.000 | 2 | + 2022-05-21 00:01:08.000 | 2022-05-21 00:01:08.000 | 2 | + 2022-05-22 00:01:08.000 | 2022-05-22 00:01:08.000 | 2 | + 2022-05-23 00:01:08.000 | 2022-05-23 00:01:08.000 | 2 | + 2022-05-24 00:01:08.000 | 2022-05-24 00:01:08.000 | 2 | + +taos> select _wstart, _wend, count(*) from meters session(ts, 1h) order by _wend desc; + _wstart | _wend | count(*) | +============================================================================ + 2022-05-24 00:01:08.000 | 2022-05-24 00:01:08.000 | 2 | + 2022-05-23 00:01:08.000 | 2022-05-23 00:01:08.000 | 2 | + 2022-05-22 00:01:08.000 | 2022-05-22 00:01:08.000 | 2 | + 2022-05-21 00:01:08.000 | 2022-05-21 00:01:08.000 | 2 | + 2022-05-20 00:01:08.000 | 2022-05-20 00:01:08.000 | 2 | + 2022-05-19 00:01:08.000 | 2022-05-19 00:01:08.000 | 2 | + 2022-05-18 00:01:08.000 | 2022-05-18 00:01:08.000 | 2 | + 2022-05-17 00:01:08.000 | 2022-05-17 00:01:08.000 | 2 | + 2022-05-16 00:01:08.000 | 2022-05-16 00:01:08.000 | 2 | + 2022-05-15 00:01:08.000 | 2022-05-15 00:01:08.000 | 2 | + +taos> select _wstart, _wend, count(*) from meters session(ts, 1h) order by _wend asc; + _wstart | _wend | count(*) | +============================================================================ + 2022-05-15 00:01:08.000 | 2022-05-15 00:01:08.000 | 2 | + 2022-05-16 00:01:08.000 | 2022-05-16 00:01:08.000 | 2 | + 2022-05-17 00:01:08.000 | 2022-05-17 00:01:08.000 | 2 | + 2022-05-18 00:01:08.000 | 2022-05-18 00:01:08.000 | 2 | + 2022-05-19 00:01:08.000 | 2022-05-19 00:01:08.000 | 2 | + 2022-05-20 00:01:08.000 | 2022-05-20 00:01:08.000 | 2 | + 2022-05-21 00:01:08.000 | 2022-05-21 00:01:08.000 | 2 | + 2022-05-22 00:01:08.000 | 2022-05-22 00:01:08.000 | 2 | + 2022-05-23 00:01:08.000 | 2022-05-23 00:01:08.000 | 2 | + 2022-05-24 00:01:08.000 | 2022-05-24 00:01:08.000 | 2 | + +taos> explain verbose true select _wstart, _wend, count(*), last(ts) from meters state_window(c2)\G; +*************************** 1.row *************************** +QUERY_PLAN: -> StateWindow on Column c2 (functions=4 width=36) +*************************** 2.row *************************** +QUERY_PLAN: Output: columns=4 width=32 +*************************** 3.row *************************** +QUERY_PLAN: Output: columns=4 width=32 +*************************** 4.row *************************** +QUERY_PLAN: -> SortMerge (columns=2 width=12 input_order=unknown output_order=unknown) +*************************** 5.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 6.row *************************** +QUERY_PLAN: Output: Ignore Group Id: false +*************************** 7.row *************************** +QUERY_PLAN: Merge Key: _group_id asc, ts asc +*************************** 8.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=12) +*************************** 9.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 10.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=2 width=12 order=[asc|1 desc|0]) +*************************** 11.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 12.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] +*************************** 13.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=12) +*************************** 14.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 15.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=2 width=12 order=[asc|1 desc|0]) +*************************** 16.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 17.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] + +taos> explain verbose true select _wstart, _wend, count(*), last(ts) from meters state_window(c2) order by _wstart desc\G; +*************************** 1.row *************************** +QUERY_PLAN: -> Sort input_order=asc output_order=desc (columns=4 width=32) +*************************** 2.row *************************** +QUERY_PLAN: Output: columns=4 width=32 +*************************** 3.row *************************** +QUERY_PLAN: -> StateWindow on Column c2 (functions=4 width=36) +*************************** 4.row *************************** +QUERY_PLAN: Output: columns=4 width=32 +*************************** 5.row *************************** +QUERY_PLAN: Output: columns=4 width=32 +*************************** 6.row *************************** +QUERY_PLAN: -> SortMerge (columns=2 width=12 input_order=unknown output_order=unknown) +*************************** 7.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 8.row *************************** +QUERY_PLAN: Output: Ignore Group Id: false +*************************** 9.row *************************** +QUERY_PLAN: Merge Key: _group_id asc, ts asc +*************************** 10.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=12) +*************************** 11.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 12.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=2 width=12 order=[asc|1 desc|0]) +*************************** 13.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 14.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] +*************************** 15.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=12) +*************************** 16.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 17.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=2 width=12 order=[asc|1 desc|0]) +*************************** 18.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 19.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] + +taos> explain verbose true select _wstart, _wend, count(*), last(ts) from meters state_window(c2) order by _wstart asc\G; +*************************** 1.row *************************** +QUERY_PLAN: -> StateWindow on Column c2 (functions=4 width=36) +*************************** 2.row *************************** +QUERY_PLAN: Output: columns=4 width=32 +*************************** 3.row *************************** +QUERY_PLAN: Output: columns=4 width=32 +*************************** 4.row *************************** +QUERY_PLAN: -> SortMerge (columns=2 width=12 input_order=unknown output_order=unknown) +*************************** 5.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 6.row *************************** +QUERY_PLAN: Output: Ignore Group Id: false +*************************** 7.row *************************** +QUERY_PLAN: Merge Key: _group_id asc, ts asc +*************************** 8.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=12) +*************************** 9.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 10.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=2 width=12 order=[asc|1 desc|0]) +*************************** 11.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 12.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] +*************************** 13.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=12) +*************************** 14.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 15.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=2 width=12 order=[asc|1 desc|0]) +*************************** 16.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 17.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] + +taos> explain verbose true select _wstart, _wend, count(*), last(ts) from meters state_window(c2) order by _wend desc\G; +*************************** 1.row *************************** +QUERY_PLAN: -> Sort input_order=asc output_order=desc (columns=4 width=32) +*************************** 2.row *************************** +QUERY_PLAN: Output: columns=4 width=32 +*************************** 3.row *************************** +QUERY_PLAN: -> StateWindow on Column c2 (functions=4 width=36) +*************************** 4.row *************************** +QUERY_PLAN: Output: columns=4 width=32 +*************************** 5.row *************************** +QUERY_PLAN: Output: columns=4 width=32 +*************************** 6.row *************************** +QUERY_PLAN: -> SortMerge (columns=2 width=12 input_order=unknown output_order=unknown) +*************************** 7.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 8.row *************************** +QUERY_PLAN: Output: Ignore Group Id: false +*************************** 9.row *************************** +QUERY_PLAN: Merge Key: _group_id asc, ts asc +*************************** 10.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=12) +*************************** 11.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 12.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=2 width=12 order=[asc|1 desc|0]) +*************************** 13.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 14.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] +*************************** 15.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=12) +*************************** 16.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 17.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=2 width=12 order=[asc|1 desc|0]) +*************************** 18.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 19.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] + +taos> explain verbose true select _wstart, _wend, count(*), last(ts) from meters state_window(c2) order by _wend asc\G; +*************************** 1.row *************************** +QUERY_PLAN: -> StateWindow on Column c2 (functions=4 width=36) +*************************** 2.row *************************** +QUERY_PLAN: Output: columns=4 width=32 +*************************** 3.row *************************** +QUERY_PLAN: Output: columns=4 width=32 +*************************** 4.row *************************** +QUERY_PLAN: -> SortMerge (columns=2 width=12 input_order=unknown output_order=unknown) +*************************** 5.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 6.row *************************** +QUERY_PLAN: Output: Ignore Group Id: false +*************************** 7.row *************************** +QUERY_PLAN: Merge Key: _group_id asc, ts asc +*************************** 8.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=12) +*************************** 9.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 10.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=2 width=12 order=[asc|1 desc|0]) +*************************** 11.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 12.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] +*************************** 13.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=12) +*************************** 14.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 15.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=2 width=12 order=[asc|1 desc|0]) +*************************** 16.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 17.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] + +taos> select _wstart, _wend, count(*), last(ts) from meters state_window(c2); + _wstart | _wend | count(*) | last(ts) | +====================================================================================================== + 2022-05-15 00:01:08.000 | 2022-05-15 00:01:08.000 | 2 | 2022-05-15 00:01:08.000 | + 2022-05-16 00:01:08.000 | 2022-05-16 00:01:08.000 | 2 | 2022-05-16 00:01:08.000 | + 2022-05-17 00:01:08.000 | 2022-05-17 00:01:08.000 | 2 | 2022-05-17 00:01:08.000 | + 2022-05-18 00:01:08.000 | 2022-05-18 00:01:08.000 | 2 | 2022-05-18 00:01:08.000 | + 2022-05-19 00:01:08.000 | 2022-05-19 00:01:08.000 | 2 | 2022-05-19 00:01:08.000 | + 2022-05-20 00:01:08.000 | 2022-05-20 00:01:08.000 | 2 | 2022-05-20 00:01:08.000 | + 2022-05-21 00:01:08.000 | 2022-05-21 00:01:08.000 | 2 | 2022-05-21 00:01:08.000 | + 2022-05-22 00:01:08.000 | 2022-05-22 00:01:08.000 | 2 | 2022-05-22 00:01:08.000 | + 2022-05-23 00:01:08.000 | 2022-05-23 00:01:08.000 | 2 | 2022-05-23 00:01:08.000 | + 2022-05-24 00:01:08.000 | 2022-05-24 00:01:08.000 | 2 | 2022-05-24 00:01:08.000 | + +taos> select _wstart, _wend, count(*), last(ts) from meters state_window(c2) order by _wstart desc; + _wstart | _wend | count(*) | last(ts) | +====================================================================================================== + 2022-05-24 00:01:08.000 | 2022-05-24 00:01:08.000 | 2 | 2022-05-24 00:01:08.000 | + 2022-05-23 00:01:08.000 | 2022-05-23 00:01:08.000 | 2 | 2022-05-23 00:01:08.000 | + 2022-05-22 00:01:08.000 | 2022-05-22 00:01:08.000 | 2 | 2022-05-22 00:01:08.000 | + 2022-05-21 00:01:08.000 | 2022-05-21 00:01:08.000 | 2 | 2022-05-21 00:01:08.000 | + 2022-05-20 00:01:08.000 | 2022-05-20 00:01:08.000 | 2 | 2022-05-20 00:01:08.000 | + 2022-05-19 00:01:08.000 | 2022-05-19 00:01:08.000 | 2 | 2022-05-19 00:01:08.000 | + 2022-05-18 00:01:08.000 | 2022-05-18 00:01:08.000 | 2 | 2022-05-18 00:01:08.000 | + 2022-05-17 00:01:08.000 | 2022-05-17 00:01:08.000 | 2 | 2022-05-17 00:01:08.000 | + 2022-05-16 00:01:08.000 | 2022-05-16 00:01:08.000 | 2 | 2022-05-16 00:01:08.000 | + 2022-05-15 00:01:08.000 | 2022-05-15 00:01:08.000 | 2 | 2022-05-15 00:01:08.000 | + +taos> select _wstart, _wend, count(*), last(ts) from meters state_window(c2) order by _wstart asc; + _wstart | _wend | count(*) | last(ts) | +====================================================================================================== + 2022-05-15 00:01:08.000 | 2022-05-15 00:01:08.000 | 2 | 2022-05-15 00:01:08.000 | + 2022-05-16 00:01:08.000 | 2022-05-16 00:01:08.000 | 2 | 2022-05-16 00:01:08.000 | + 2022-05-17 00:01:08.000 | 2022-05-17 00:01:08.000 | 2 | 2022-05-17 00:01:08.000 | + 2022-05-18 00:01:08.000 | 2022-05-18 00:01:08.000 | 2 | 2022-05-18 00:01:08.000 | + 2022-05-19 00:01:08.000 | 2022-05-19 00:01:08.000 | 2 | 2022-05-19 00:01:08.000 | + 2022-05-20 00:01:08.000 | 2022-05-20 00:01:08.000 | 2 | 2022-05-20 00:01:08.000 | + 2022-05-21 00:01:08.000 | 2022-05-21 00:01:08.000 | 2 | 2022-05-21 00:01:08.000 | + 2022-05-22 00:01:08.000 | 2022-05-22 00:01:08.000 | 2 | 2022-05-22 00:01:08.000 | + 2022-05-23 00:01:08.000 | 2022-05-23 00:01:08.000 | 2 | 2022-05-23 00:01:08.000 | + 2022-05-24 00:01:08.000 | 2022-05-24 00:01:08.000 | 2 | 2022-05-24 00:01:08.000 | + +taos> select _wstart, _wend, count(*), last(ts) from meters state_window(c2) order by _wend desc; + _wstart | _wend | count(*) | last(ts) | +====================================================================================================== + 2022-05-24 00:01:08.000 | 2022-05-24 00:01:08.000 | 2 | 2022-05-24 00:01:08.000 | + 2022-05-23 00:01:08.000 | 2022-05-23 00:01:08.000 | 2 | 2022-05-23 00:01:08.000 | + 2022-05-22 00:01:08.000 | 2022-05-22 00:01:08.000 | 2 | 2022-05-22 00:01:08.000 | + 2022-05-21 00:01:08.000 | 2022-05-21 00:01:08.000 | 2 | 2022-05-21 00:01:08.000 | + 2022-05-20 00:01:08.000 | 2022-05-20 00:01:08.000 | 2 | 2022-05-20 00:01:08.000 | + 2022-05-19 00:01:08.000 | 2022-05-19 00:01:08.000 | 2 | 2022-05-19 00:01:08.000 | + 2022-05-18 00:01:08.000 | 2022-05-18 00:01:08.000 | 2 | 2022-05-18 00:01:08.000 | + 2022-05-17 00:01:08.000 | 2022-05-17 00:01:08.000 | 2 | 2022-05-17 00:01:08.000 | + 2022-05-16 00:01:08.000 | 2022-05-16 00:01:08.000 | 2 | 2022-05-16 00:01:08.000 | + 2022-05-15 00:01:08.000 | 2022-05-15 00:01:08.000 | 2 | 2022-05-15 00:01:08.000 | + +taos> select _wstart, _wend, count(*), last(ts) from meters state_window(c2) order by _wend asc; + _wstart | _wend | count(*) | last(ts) | +====================================================================================================== + 2022-05-15 00:01:08.000 | 2022-05-15 00:01:08.000 | 2 | 2022-05-15 00:01:08.000 | + 2022-05-16 00:01:08.000 | 2022-05-16 00:01:08.000 | 2 | 2022-05-16 00:01:08.000 | + 2022-05-17 00:01:08.000 | 2022-05-17 00:01:08.000 | 2 | 2022-05-17 00:01:08.000 | + 2022-05-18 00:01:08.000 | 2022-05-18 00:01:08.000 | 2 | 2022-05-18 00:01:08.000 | + 2022-05-19 00:01:08.000 | 2022-05-19 00:01:08.000 | 2 | 2022-05-19 00:01:08.000 | + 2022-05-20 00:01:08.000 | 2022-05-20 00:01:08.000 | 2 | 2022-05-20 00:01:08.000 | + 2022-05-21 00:01:08.000 | 2022-05-21 00:01:08.000 | 2 | 2022-05-21 00:01:08.000 | + 2022-05-22 00:01:08.000 | 2022-05-22 00:01:08.000 | 2 | 2022-05-22 00:01:08.000 | + 2022-05-23 00:01:08.000 | 2022-05-23 00:01:08.000 | 2 | 2022-05-23 00:01:08.000 | + 2022-05-24 00:01:08.000 | 2022-05-24 00:01:08.000 | 2 | 2022-05-24 00:01:08.000 | + +taos> explain verbose true select _wstart, _wend, count(*), last(ts) from meters state_window(c2) order by _wend asc, count(*) desc\G; +*************************** 1.row *************************** +QUERY_PLAN: -> Sort input_order=asc output_order=asc (columns=4 width=32) +*************************** 2.row *************************** +QUERY_PLAN: Output: columns=4 width=32 +*************************** 3.row *************************** +QUERY_PLAN: -> StateWindow on Column c2 (functions=5 width=44) +*************************** 4.row *************************** +QUERY_PLAN: Output: columns=5 width=40 +*************************** 5.row *************************** +QUERY_PLAN: Output: columns=5 width=40 +*************************** 6.row *************************** +QUERY_PLAN: -> SortMerge (columns=2 width=12 input_order=unknown output_order=unknown) +*************************** 7.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 8.row *************************** +QUERY_PLAN: Output: Ignore Group Id: false +*************************** 9.row *************************** +QUERY_PLAN: Merge Key: _group_id asc, ts asc +*************************** 10.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=12) +*************************** 11.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 12.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=2 width=12 order=[asc|1 desc|0]) +*************************** 13.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 14.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] +*************************** 15.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=12) +*************************** 16.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 17.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=2 width=12 order=[asc|1 desc|0]) +*************************** 18.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 19.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] + +taos> explain verbose true select _wstart, _wend, last(ts) from (select _wstart as ts, _wend, count(*), last(ts) from meters state_window(c2) order by _wend desc) interval(1h) order by _wstart desc\G; +*************************** 1.row *************************** +QUERY_PLAN: -> Interval on Column ts (functions=3 width=24 input_order=desc output_order=desc ) +*************************** 2.row *************************** +QUERY_PLAN: Output: columns=3 width=24 +*************************** 3.row *************************** +QUERY_PLAN: Time Window: interval=1h offset=0a sliding=1h +*************************** 4.row *************************** +QUERY_PLAN: Merge ResBlocks: True +*************************** 5.row *************************** +QUERY_PLAN: -> Projection (columns=3 width=24 input_order=desc ) +*************************** 6.row *************************** +QUERY_PLAN: Output: columns=3 width=24 +*************************** 7.row *************************** +QUERY_PLAN: Output: Ignore Group Id: true +*************************** 8.row *************************** +QUERY_PLAN: Merge ResBlocks: True +*************************** 9.row *************************** +QUERY_PLAN: -> Sort input_order=asc output_order=desc (columns=3 width=24) +*************************** 10.row *************************** +QUERY_PLAN: Output: columns=3 width=24 +*************************** 11.row *************************** +QUERY_PLAN: -> StateWindow on Column c2 (functions=4 width=36) +*************************** 12.row *************************** +QUERY_PLAN: Output: columns=4 width=32 +*************************** 13.row *************************** +QUERY_PLAN: Output: columns=4 width=32 +*************************** 14.row *************************** +QUERY_PLAN: -> SortMerge (columns=2 width=12 input_order=unknown output_order=unknown) +*************************** 15.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 16.row *************************** +QUERY_PLAN: Output: Ignore Group Id: false +*************************** 17.row *************************** +QUERY_PLAN: Merge Key: _group_id asc, ts asc +*************************** 18.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=12) +*************************** 19.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 20.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=2 width=12 order=[asc|1 desc|0]) +*************************** 21.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 22.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] +*************************** 23.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=12) +*************************** 24.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 25.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=2 width=12 order=[asc|1 desc|0]) +*************************** 26.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 27.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] + +taos> explain verbose true select _wstart, _wend, last(ts) from (select _wstart as ts, _wend, count(*), last(ts) from meters state_window(c2) order by _wend asc) interval(1h) order by _wstart desc\G; +*************************** 1.row *************************** +QUERY_PLAN: -> Interval on Column ts (functions=3 width=24 input_order=asc output_order=desc ) +*************************** 2.row *************************** +QUERY_PLAN: Output: columns=3 width=24 +*************************** 3.row *************************** +QUERY_PLAN: Time Window: interval=1h offset=0a sliding=1h +*************************** 4.row *************************** +QUERY_PLAN: Merge ResBlocks: True +*************************** 5.row *************************** +QUERY_PLAN: -> Projection (columns=3 width=24 input_order=asc ) +*************************** 6.row *************************** +QUERY_PLAN: Output: columns=3 width=24 +*************************** 7.row *************************** +QUERY_PLAN: Output: Ignore Group Id: true +*************************** 8.row *************************** +QUERY_PLAN: Merge ResBlocks: True +*************************** 9.row *************************** +QUERY_PLAN: -> StateWindow on Column c2 (functions=4 width=36) +*************************** 10.row *************************** +QUERY_PLAN: Output: columns=4 width=32 +*************************** 11.row *************************** +QUERY_PLAN: Output: columns=4 width=32 +*************************** 12.row *************************** +QUERY_PLAN: -> SortMerge (columns=2 width=12 input_order=unknown output_order=unknown) +*************************** 13.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 14.row *************************** +QUERY_PLAN: Output: Ignore Group Id: false +*************************** 15.row *************************** +QUERY_PLAN: Merge Key: _group_id asc, ts asc +*************************** 16.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=12) +*************************** 17.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 18.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=2 width=12 order=[asc|1 desc|0]) +*************************** 19.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 20.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] +*************************** 21.row *************************** +QUERY_PLAN: -> Data Exchange 1:1 (width=12) +*************************** 22.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 23.row *************************** +QUERY_PLAN: -> Table Merge Scan on meters (columns=2 width=12 order=[asc|1 desc|0]) +*************************** 24.row *************************** +QUERY_PLAN: Output: columns=2 width=12 +*************************** 25.row *************************** +QUERY_PLAN: Time Range: [-9223372036854775808, 9223372036854775807] + diff --git a/tests/script/tsim/query/t/explain_tsorder.sql b/tests/script/tsim/query/t/explain_tsorder.sql index 056ac440fee299677b991d0a996ac47a2e854073..53bfb9a597b47a66d84b88bdcea35c071a6ec439 100644 --- a/tests/script/tsim/query/t/explain_tsorder.sql +++ b/tests/script/tsim/query/t/explain_tsorder.sql @@ -98,3 +98,65 @@ select last(ts), c2 as d from d1 group by c2 order by c2 asc limit 9,1; select last(ts) as ts, c2 as d from d1 group by c2 order by ts desc, c2 asc limit 10; select last(ts) as ts, c2 as d from d1 group by c2 order by ts desc, c2 asc limit 2,8; select last(ts) as ts, c2 as d from d1 group by c2 order by ts desc, c2 asc limit 9,1; + +explain verbose true select _wstart, _wend, count(*) from meters event_window start with c2 > 0 end with c2 < 100\G; +explain verbose true select _wstart, _wend, count(*) from meters event_window start with c2 > 0 end with c2 < 100 order by _wstart desc\G; +explain verbose true select _wstart, _wend, count(*) from meters event_window start with c2 > 0 end with c2 < 100 order by _wstart asc\G; + +explain verbose true select _wstart, _wend, count(*) from meters event_window start with c2 > 0 end with c2 < 100 order by _wend desc\G; +explain verbose true select _wstart, _wend, count(*) from meters event_window start with c2 > 0 end with c2 < 100 order by _wend asc\G; + +select _wstart, _wend, count(*) from meters event_window start with c2 > 0 end with c2 < 100; +select _wstart, _wend, count(*) from meters event_window start with c2 > 0 end with c2 < 100 order by _wstart desc; +select _wstart, _wend, count(*) from meters event_window start with c2 > 0 end with c2 < 100 order by _wstart asc; + +select _wstart, _wend, count(*) from meters event_window start with c2 > 0 end with c2 < 100 order by _wend desc; +select _wstart, _wend, count(*) from meters event_window start with c2 > 0 end with c2 < 100 order by _wend asc; + +explain verbose true select _wstart, _wend, count(*) from meters session(ts, 1h)\G; +explain verbose true select _wstart, _wend, count(*) from meters session(ts, 1h) order by _wstart desc\G; +explain verbose true select _wstart, _wend, count(*) from meters session(ts, 1h) order by _wstart asc\G; + +explain verbose true select _wstart, _wend, count(*) from meters session(ts, 1h) order by _wend desc\G; +explain verbose true select _wstart, _wend, count(*) from meters session(ts, 1h) order by _wend asc\G; + +select _wstart, _wend, count(*) from meters session(ts, 1h); +select _wstart, _wend, count(*) from meters session(ts, 1h) order by _wstart desc; +select _wstart, _wend, count(*) from meters session(ts, 1h) order by _wstart asc; + +select _wstart, _wend, count(*) from meters session(ts, 1h) order by _wend desc; +select _wstart, _wend, count(*) from meters session(ts, 1h) order by _wend asc; + + +explain verbose true select _wstart, _wend, count(*) from meters session(ts, 1h)\G; +explain verbose true select _wstart, _wend, count(*) from meters session(ts, 1h) order by _wstart desc\G; +explain verbose true select _wstart, _wend, count(*) from meters session(ts, 1h) order by _wstart asc\G; + +explain verbose true select _wstart, _wend, count(*) from meters session(ts, 1h) order by _wend desc\G; +explain verbose true select _wstart, _wend, count(*) from meters session(ts, 1h) order by _wend asc\G; + +select _wstart, _wend, count(*) from meters session(ts, 1h); +select _wstart, _wend, count(*) from meters session(ts, 1h) order by _wstart desc; +select _wstart, _wend, count(*) from meters session(ts, 1h) order by _wstart asc; + +select _wstart, _wend, count(*) from meters session(ts, 1h) order by _wend desc; +select _wstart, _wend, count(*) from meters session(ts, 1h) order by _wend asc; + +explain verbose true select _wstart, _wend, count(*), last(ts) from meters state_window(c2)\G; +explain verbose true select _wstart, _wend, count(*), last(ts) from meters state_window(c2) order by _wstart desc\G; +explain verbose true select _wstart, _wend, count(*), last(ts) from meters state_window(c2) order by _wstart asc\G; + +explain verbose true select _wstart, _wend, count(*), last(ts) from meters state_window(c2) order by _wend desc\G; +explain verbose true select _wstart, _wend, count(*), last(ts) from meters state_window(c2) order by _wend asc\G; + +select _wstart, _wend, count(*), last(ts) from meters state_window(c2); +select _wstart, _wend, count(*), last(ts) from meters state_window(c2) order by _wstart desc; +select _wstart, _wend, count(*), last(ts) from meters state_window(c2) order by _wstart asc; + +select _wstart, _wend, count(*), last(ts) from meters state_window(c2) order by _wend desc; +select _wstart, _wend, count(*), last(ts) from meters state_window(c2) order by _wend asc; + +explain verbose true select _wstart, _wend, count(*), last(ts) from meters state_window(c2) order by _wend asc, count(*) desc\G; + +explain verbose true select _wstart, _wend, last(ts) from (select _wstart as ts, _wend, count(*), last(ts) from meters state_window(c2) order by _wend desc) interval(1h) order by _wstart desc\G; +explain verbose true select _wstart, _wend, last(ts) from (select _wstart as ts, _wend, count(*), last(ts) from meters state_window(c2) order by _wend asc) interval(1h) order by _wstart desc\G; diff --git a/tests/script/tsim/stream/basic3.sim b/tests/script/tsim/stream/basic3.sim index 2df33541b461b1f2fe1c772cb80550e421fe56aa..f18061a6df012b5093beef6c8f1588a61b69f5b8 100644 --- a/tests/script/tsim/stream/basic3.sim +++ b/tests/script/tsim/stream/basic3.sim @@ -1,11 +1,9 @@ system sh/stop_dnodes.sh system sh/deploy.sh -n dnode1 -i 1 -system sh/cfg.sh -n dnode1 -c debugflag -v 131 system sh/cfg.sh -n dnode1 -c keepColumnName -v 1 system sh/exec.sh -n dnode1 -s start -sleep 5000 - +sleep 1000 sql connect print ========== interval\session\state window @@ -32,7 +30,6 @@ sql create stream streamd6 into streamt6 as select ca, _wstart,_wend, count(*), sql alter local 'keepColumnName' '1' - sql CREATE STABLE `meters_test_data` (`ts` TIMESTAMP, `close` FLOAT, `parttime` TIMESTAMP, `parttime_str` VARCHAR(32)) TAGS (`id` VARCHAR(32)); sql_error create stream realtime_meters fill_history 1 into realtime_meters as select last(parttime),first(close),last(close) from meters_test_data partition by tbname state_window(parttime_str); @@ -58,17 +55,13 @@ sql_error create stream streamd11 into streamd11 as select _wstart, _wend, count sql alter local 'keepColumnName' '0' sql create stream realtime_meters fill_history 1 into realtime_meters as select last(parttime),first(close),last(close) from meters_test_data partition by tbname state_window(parttime_str); - sql desc realtime_meters; - if $rows == 0 then return -1 endi -sql create stream streamd7 into streamt7 as select _wstart, _wend, count(*), first(ca), last(ca) from t1 interval(10s); - +sql create stream streamd7 into streamt7 as select _wstart t1, _wend t2, count(*), first(ca), last(ca) from t1 interval(10s); sql desc streamt7; - if $rows == 0 then return -1 endi @@ -76,12 +69,11 @@ endi sql create stream streamd71 into streamt71 as select _wstart, _wend, count(*) as ca, first(ca), last(ca) as c2 from t1 interval(10s); sql desc streamt71; - if $rows == 0 then return -1 endi -sleep 3000 +sleep 1000 sql drop stream if exists streamd1; sql drop stream if exists streamd2; @@ -93,23 +85,19 @@ sql drop stream if exists streamd6; sql create stream streamd10 into streamd10 as select _wstart, _wend, count(*), first(ca), last(cb) as c2 from t1 interval(10s); sql desc streamd10; - if $rows == 0 then return -1 endi sql_error create stream streamd11 into streamd11 as select _wstart, _wend, count(*), last(ca), last(ca) from t1 interval(10s); - sql create stream streamd12 into streamd12 as select _wstart, _wend, count(*), last(ca), last(cb) as c2 from t1 interval(10s); - sql desc streamd12; if $rows == 0 then return -1 endi - _OVER: system sh/exec.sh -n dnode1 -s stop -x SIGINT print =============== check diff --git a/tests/script/tsim/tmq/basic1.sim b/tests/script/tsim/tmq/basic1.sim index 4551228f2f67e6da9beed93f984e93113a77055f..fe6ec04a205313f084b120bf7d7bdf0d6096d916 100644 --- a/tests/script/tsim/tmq/basic1.sim +++ b/tests/script/tsim/tmq/basic1.sim @@ -35,7 +35,6 @@ sql connect sql use $dbName print == alter database -sql alter database $dbName wal_retention_period 3600 print == create topics from super table sql create topic topic_stb_column as select ts, c3 from stb @@ -87,7 +86,6 @@ sleep 500 sql use $cdbName print == alter database -sql alter database $cdbName wal_retention_period 3600 print == create consume info table and consume result table sql create table consumeinfo (ts timestamp, consumerid int, topiclist binary(1024), keylist binary(1024), expectmsgcnt bigint, ifcheckdata int, ifmanualcommit int) @@ -162,7 +160,6 @@ sleep 500 sql use $cdbName print == alter database -sql alter database $cdbName wal_retention_period 3600 print == create consume info table and consume result table sql create table consumeinfo (ts timestamp, consumerid int, topiclist binary(1024), keylist binary(1024), expectmsgcnt bigint, ifcheckdata int, ifmanualcommit int) @@ -236,7 +233,6 @@ sleep 500 sql use $cdbName print == alter database -sql alter database $cdbName wal_retention_period 3600 print == create consume info table and consume result table sql create table consumeinfo (ts timestamp, consumerid int, topiclist binary(1024), keylist binary(1024), expectmsgcnt bigint, ifcheckdata int, ifmanualcommit int) diff --git a/tests/script/tsim/tmq/basic1Of2Cons.sim b/tests/script/tsim/tmq/basic1Of2Cons.sim index 51d39e8d110f0be00e3a16cf7cdd47e6be226a78..c12351cbe81538ad87fdde313bd0e4f4c074875b 100644 --- a/tests/script/tsim/tmq/basic1Of2Cons.sim +++ b/tests/script/tsim/tmq/basic1Of2Cons.sim @@ -35,7 +35,6 @@ sql connect sql use $dbName print == alter database -sql alter database $dbName wal_retention_period 3600 print == create topics from super table sql create topic topic_stb_column as select ts, c3 from stb @@ -87,7 +86,6 @@ sleep 500 sql use $cdbName print == alter database -sql alter database $cdbName wal_retention_period 3600 print == create consume info table and consume result table for stb sql create table consumeinfo (ts timestamp, consumerid int, topiclist binary(1024), keylist binary(1024), expectmsgcnt bigint, ifcheckdata int, ifmanualcommit int) @@ -193,7 +191,6 @@ sleep 500 sql use $cdbName print == alter database -sql alter database $cdbName wal_retention_period 3600 print == create consume info table and consume result table for ctb sql create table consumeinfo (ts timestamp, consumerid int, topiclist binary(1024), keylist binary(1024), expectmsgcnt bigint, ifcheckdata int, ifmanualcommit int) @@ -298,7 +295,6 @@ sleep 500 sql use $cdbName print == alter database -sql alter database $cdbName wal_retention_period 3600 print == create consume info table and consume result table for ntb sql create table consumeinfo (ts timestamp, consumerid int, topiclist binary(1024), keylist binary(1024), expectmsgcnt bigint, ifcheckdata int, ifmanualcommit int) diff --git a/tests/script/tsim/tmq/basic2.sim b/tests/script/tsim/tmq/basic2.sim index 8356a60b672c54feb37663d5c3b8f3391c99a456..5c7528ea5dc749da644b83f0928f7b3cc35f8c7a 100644 --- a/tests/script/tsim/tmq/basic2.sim +++ b/tests/script/tsim/tmq/basic2.sim @@ -35,7 +35,6 @@ sql connect sql use $dbName print == alter database -sql alter database $dbName wal_retention_period 3600 print == create topics from super table sql create topic topic_stb_column as select ts, c3 from stb @@ -122,7 +121,6 @@ sleep 500 sql use $cdbName print == alter database -sql alter database $cdbName wal_retention_period 3600 print == create consume info table and consume result table sql create table consumeinfo (ts timestamp, consumerid int, topiclist binary(1024), keylist binary(1024), expectmsgcnt bigint, ifcheckdata int, ifmanualcommit int) @@ -182,7 +180,6 @@ sleep 500 sql use $cdbName print == alter database -sql alter database $cdbName wal_retention_period 3600 print == create consume info table and consume result table sql create table consumeinfo (ts timestamp, consumerid int, topiclist binary(1024), keylist binary(1024), expectmsgcnt bigint, ifcheckdata int, ifmanualcommit int) diff --git a/tests/script/tsim/tmq/basic2Of2Cons.sim b/tests/script/tsim/tmq/basic2Of2Cons.sim index 63e7e2dcf4e600e340f6c3767ab91dfd19fc5338..23598c17a4bce516d2b0c888cb55c6405766f2d8 100644 --- a/tests/script/tsim/tmq/basic2Of2Cons.sim +++ b/tests/script/tsim/tmq/basic2Of2Cons.sim @@ -35,7 +35,6 @@ sql connect sql use $dbName print == alter database -sql alter database $dbName wal_retention_period 3600 print == create topics from super table sql create topic topic_stb_column as select ts, c3 from stb @@ -151,7 +150,6 @@ sleep 500 sql use $cdbName print == alter database -sql alter database $cdbName wal_retention_period 3600 print == create consume info table and consume result table for ctb sql create table consumeinfo (ts timestamp, consumerid int, topiclist binary(1024), keylist binary(1024), expectmsgcnt bigint, ifcheckdata int, ifmanualcommit int) @@ -241,7 +239,6 @@ sleep 500 sql use $cdbName print == alter database -sql alter database $cdbName wal_retention_period 3600 print == create consume info table and consume result table for ntb sql create table consumeinfo (ts timestamp, consumerid int, topiclist binary(1024), keylist binary(1024), expectmsgcnt bigint, ifcheckdata int, ifmanualcommit int) diff --git a/tests/script/tsim/tmq/basic2Of2ConsOverlap.sim b/tests/script/tsim/tmq/basic2Of2ConsOverlap.sim index cfdae059dc862d4177d12891ffd9499900ba3b41..1223a94fa7e666540d4e790440c2a9039ab2feab 100644 --- a/tests/script/tsim/tmq/basic2Of2ConsOverlap.sim +++ b/tests/script/tsim/tmq/basic2Of2ConsOverlap.sim @@ -35,7 +35,6 @@ sql connect sql use $dbName print == alter database -sql alter database $dbName wal_retention_period 3600 print == create topics from super table sql create topic topic_stb_column as select ts, c3 from stb @@ -172,7 +171,6 @@ sleep 500 sql use $cdbName print == alter database -sql alter database $cdbName wal_retention_period 3600 print == create consume info table and consume result table for ctb sql create table consumeinfo (ts timestamp, consumerid int, topiclist binary(1024), keylist binary(1024), expectmsgcnt bigint, ifcheckdata int, ifmanualcommit int) @@ -266,7 +264,6 @@ sleep 500 sql use $cdbName print == alter database -sql alter database $cdbName wal_retention_period 3600 print == create consume info table and consume result table for ntb sql create table consumeinfo (ts timestamp, consumerid int, topiclist binary(1024), keylist binary(1024), expectmsgcnt bigint, ifcheckdata int, ifmanualcommit int) diff --git a/tests/script/tsim/tmq/basic3.sim b/tests/script/tsim/tmq/basic3.sim index a64dd6924d7c30dec12e315924439b37019af500..8bb34cefa2382fb89856cf477b9a1bd82daf800e 100644 --- a/tests/script/tsim/tmq/basic3.sim +++ b/tests/script/tsim/tmq/basic3.sim @@ -35,7 +35,6 @@ sql connect sql use $dbName print == alter database -sql alter database $dbName wal_retention_period 3600 print == create topics from super table sql create topic topic_stb_column as select ts, c3 from stb @@ -87,7 +86,6 @@ sleep 500 sql use $cdbName print == alter database -sql alter database $cdbName wal_retention_period 3600 print == create consume info table and consume result table sql create table consumeinfo (ts timestamp, consumerid int, topiclist binary(1024), keylist binary(1024), expectmsgcnt bigint, ifcheckdata int, ifmanualcommit int) @@ -161,7 +159,6 @@ sleep 500 sql use $cdbName print == alter database -sql alter database $cdbName wal_retention_period 3600 print == create consume info table and consume result table sql create table consumeinfo (ts timestamp, consumerid int, topiclist binary(1024), keylist binary(1024), expectmsgcnt bigint, ifcheckdata int, ifmanualcommit int) @@ -235,7 +232,6 @@ sleep 500 sql use $cdbName print == alter database -sql alter database $cdbName wal_retention_period 3600 print == create consume info table and consume result table sql create table consumeinfo (ts timestamp, consumerid int, topiclist binary(1024), keylist binary(1024), expectmsgcnt bigint, ifcheckdata int, ifmanualcommit int) diff --git a/tests/script/tsim/tmq/basic3Of2Cons.sim b/tests/script/tsim/tmq/basic3Of2Cons.sim index 4e47e3dbf9a0eeaf4380e43ceb6caef81f3c8770..75d762c44b6b572f2c9dfd5633b77eb5ff53ec39 100644 --- a/tests/script/tsim/tmq/basic3Of2Cons.sim +++ b/tests/script/tsim/tmq/basic3Of2Cons.sim @@ -35,7 +35,6 @@ sql connect sql use $dbName print == alter database -sql alter database $dbName wal_retention_period 3600 print == create topics from super table sql create topic topic_stb_column as select ts, c3 from stb @@ -86,7 +85,6 @@ sleep 500 sql use $cdbName print == alter database -sql alter database $cdbName wal_retention_period 3600 print == create consume info table and consume result table sql create table consumeinfo (ts timestamp, consumerid int, topiclist binary(1024), keylist binary(1024), expectmsgcnt bigint, ifcheckdata int, ifmanualcommit int) @@ -204,7 +202,6 @@ sleep 500 sql use $cdbName print == alter database -sql alter database $cdbName wal_retention_period 3600 print == create consume info table and consume result table sql create table consumeinfo (ts timestamp, consumerid int, topiclist binary(1024), keylist binary(1024), expectmsgcnt bigint, ifcheckdata int, ifmanualcommit int) @@ -309,7 +306,6 @@ sleep 500 sql use $cdbName print == alter database -sql alter database $cdbName wal_retention_period 3600 print == create consume info table and consume result table sql create table consumeinfo (ts timestamp, consumerid int, topiclist binary(1024), keylist binary(1024), expectmsgcnt bigint, ifcheckdata int, ifmanualcommit int) diff --git a/tests/script/tsim/tmq/basic4.sim b/tests/script/tsim/tmq/basic4.sim index 6b35342ad119431d1eece232486069c9792015d9..c72d8ff412cc8b8687f4388ed471951eb7c09a47 100644 --- a/tests/script/tsim/tmq/basic4.sim +++ b/tests/script/tsim/tmq/basic4.sim @@ -35,7 +35,6 @@ sql connect sql use $dbName print == alter database -sql alter database $dbName wal_retention_period 3600 print == create topics from super table sql create topic topic_stb_column as select ts, c3 from stb @@ -119,7 +118,6 @@ sleep 500 sql use $cdbName print == alter database -sql alter database $cdbName wal_retention_period 3600 print == create consume info table and consume result table sql create table consumeinfo (ts timestamp, consumerid int, topiclist binary(1024), keylist binary(1024), expectmsgcnt bigint, ifcheckdata int, ifmanualcommit int) @@ -179,7 +177,6 @@ sleep 500 sql use $cdbName print == alter database -sql alter database $cdbName wal_retention_period 3600 print == create consume info table and consume result table sql create table consumeinfo (ts timestamp, consumerid int, topiclist binary(1024), keylist binary(1024), expectmsgcnt bigint, ifcheckdata int, ifmanualcommit int) diff --git a/tests/script/tsim/tmq/basic4Of2Cons.sim b/tests/script/tsim/tmq/basic4Of2Cons.sim index 122a91af3627fb12d00be61a503942e32dca604d..bb006a354c0f0af476d0365de4cd28f0dea2a7de 100644 --- a/tests/script/tsim/tmq/basic4Of2Cons.sim +++ b/tests/script/tsim/tmq/basic4Of2Cons.sim @@ -35,7 +35,6 @@ sql connect sql use $dbName print == alter database -sql alter database $dbName wal_retention_period 3600 print == create topics from super table sql create topic topic_stb_column as select ts, c3 from stb @@ -160,7 +159,6 @@ sleep 500 sql use $cdbName print == alter database -sql alter database $cdbName wal_retention_period 3600 print == create consume info table and consume result table sql create table consumeinfo (ts timestamp, consumerid int, topiclist binary(1024), keylist binary(1024), expectmsgcnt bigint, ifcheckdata int, ifmanualcommit int) @@ -251,7 +249,6 @@ sleep 500 sql use $cdbName print == alter database -sql alter database $cdbName wal_retention_period 3600 print == create consume info table and consume result table sql create table consumeinfo (ts timestamp, consumerid int, topiclist binary(1024), keylist binary(1024), expectmsgcnt bigint, ifcheckdata int, ifmanualcommit int) diff --git a/tests/script/tsim/tmq/snapshot.sim b/tests/script/tsim/tmq/snapshot.sim index 81fff3522499ff99ccfa9a92be5483ea42d775ba..fbdaba7d28d266526bd02aaa86a5de3c85f41338 100644 --- a/tests/script/tsim/tmq/snapshot.sim +++ b/tests/script/tsim/tmq/snapshot.sim @@ -35,7 +35,6 @@ sql connect sql use $dbName print == alter database -sql alter database $dbName wal_retention_period 3600 print == create topics from super table sql create topic topic_stb_column as select ts, c3 from stb @@ -87,7 +86,6 @@ sleep 500 sql use $cdbName print == alter database -sql alter database $cdbName wal_retention_period 3600 print == create consume info table and consume result table sql create table consumeinfo (ts timestamp, consumerid int, topiclist binary(1024), keylist binary(1024), expectmsgcnt bigint, ifcheckdata int, ifmanualcommit int) @@ -159,7 +157,6 @@ sleep 500 sql use $cdbName print == alter database -sql alter database $cdbName wal_retention_period 3600 print == create consume info table and consume result table sql create table consumeinfo (ts timestamp, consumerid int, topiclist binary(1024), keylist binary(1024), expectmsgcnt bigint, ifcheckdata int, ifmanualcommit int) @@ -233,7 +230,6 @@ sleep 500 sql use $cdbName print == alter database -sql alter database $cdbName wal_retention_period 3600 print == create consume info table and consume result table sql create table consumeinfo (ts timestamp, consumerid int, topiclist binary(1024), keylist binary(1024), expectmsgcnt bigint, ifcheckdata int, ifmanualcommit int) diff --git a/tests/script/tsim/tmq/snapshot1.sim b/tests/script/tsim/tmq/snapshot1.sim index c79892ae1d943a071c58fe2236eb90ef9a5e10d4..5349981cc719e41d6d91a9a0a6e18555a7b8fd86 100644 --- a/tests/script/tsim/tmq/snapshot1.sim +++ b/tests/script/tsim/tmq/snapshot1.sim @@ -35,7 +35,6 @@ sql connect sql use $dbName print == alter database -sql alter database $dbName wal_retention_period 3600 print == create topics from super table sql create topic topic_stb_column as select ts, c3 from stb @@ -151,7 +150,6 @@ sleep 500 sql use $cdbName print == alter database -sql alter database $cdbName wal_retention_period 3600 print == create consume info table and consume result table for ctb sql create table consumeinfo (ts timestamp, consumerid int, topiclist binary(1024), keylist binary(1024), expectmsgcnt bigint, ifcheckdata int, ifmanualcommit int) @@ -231,7 +229,6 @@ sleep 500 sql use $cdbName print == alter database -sql alter database $cdbName wal_retention_period 3600 print == create consume info table and consume result table for ntb sql create table consumeinfo (ts timestamp, consumerid int, topiclist binary(1024), keylist binary(1024), expectmsgcnt bigint, ifcheckdata int, ifmanualcommit int) diff --git a/tests/script/tsim/tmq/topic.sim b/tests/script/tsim/tmq/topic.sim index 78c4c561af9ae55d203635cdccc562eb8bec869d..8d4b506b2492b4abbb178f3f8b99f25c1e88e9d9 100644 --- a/tests/script/tsim/tmq/topic.sim +++ b/tests/script/tsim/tmq/topic.sim @@ -40,7 +40,6 @@ endi sql use $dbName print == alter database -sql alter database $dbName wal_retention_period 3600 print == create super table sql create table $stbPrefix (ts timestamp, c1 int, c2 float, c3 binary(16)) tags (t1 int) diff --git a/tests/system-test/0-others/compatibility.py b/tests/system-test/0-others/compatibility.py index 9dbfd7f0ea46562cc589eacfef90552887b1eafa..98a0fbe18d2ebadb253cd003f563811476141a7d 100644 --- a/tests/system-test/0-others/compatibility.py +++ b/tests/system-test/0-others/compatibility.py @@ -193,7 +193,6 @@ class TDTestCase: tdsql.execute("drop database if exists db") tdsql.execute("create database db") tdsql.execute("use db") - tdsql.execute("alter database db wal_retention_period 3600") tdsql.execute("create stable db.stb1 (ts timestamp, c1 int) tags (t1 int);") tdsql.execute("insert into db.ct1 using db.stb1 TAGS(1) values(now(),11);") tdsql.error(" insert into `db.ct2` using db.stb1 TAGS(9) values(now(),11);") @@ -204,7 +203,6 @@ class TDTestCase: tdsql.execute("insert into db.`ct4` using db.stb1 TAGS(4) values(now(),14);") tdsql.query("select * from db.ct4") tdsql.checkData(0,1,14) - print(1) tdsql=tdCom.newTdSql() tdsql.query("describe information_schema.ins_databases;") qRows=tdsql.queryRows diff --git a/tests/system-test/0-others/performance_schema.py b/tests/system-test/0-others/performance_schema.py index 9d2a362254714a2daefc98787a3c1ec236c3e80c..b4a16794bb0f36b907c21ff4dd657b9b920321a1 100755 --- a/tests/system-test/0-others/performance_schema.py +++ b/tests/system-test/0-others/performance_schema.py @@ -75,7 +75,6 @@ class TDTestCase: def prepare_data(self): tdSql.execute(f"create database if not exists {self.dbname} vgroups 2") #1 query tdSql.execute(f'use {self.dbname}') #1 query - tdsql.execute(f"alter database {self.dbname} wal_retention_period 3600") tdSql.execute(self.setsql.set_create_stable_sql(self.stbname,self.column_dict,self.tag_dict)) #1 query for i in range(self.tbnum): #self.tbnum query diff --git a/tests/system-test/0-others/show.py b/tests/system-test/0-others/show.py index 50a1662ba013d4e4dd52ffbf4bdfb9f47f7c92d7..9d26b3a2aeb679195246e0989605b48f12b91a77 100644 --- a/tests/system-test/0-others/show.py +++ b/tests/system-test/0-others/show.py @@ -95,6 +95,23 @@ class TDTestCase: tdSql.checkEqual(f'{db}',tdSql.queryResult[0][0]) tdSql.checkEqual(f'CREATE DATABASE `{db}`',tdSql.queryResult[0][1]) + def show_create_systb_sql(self): + for param in self.ins_param_list: + tdSql.query(f'show create table information_schema.ins_{param}') + tdSql.checkEqual(f'ins_{param}',tdSql.queryResult[0][0]) + + tdSql.execute(f'use information_schema') + tdSql.query(f'show create table ins_{param}') + tdSql.checkEqual(f'ins_{param}',tdSql.queryResult[0][0]) + + for param in self.perf_param_list: + tdSql.query(f'show create table performance_schema.perf_{param}') + tdSql.checkEqual(f'perf_{param}',tdSql.queryResult[0][0]) + + tdSql.execute(f'use performance_schema') + tdSql.query(f'show create table perf_{param}') + tdSql.checkEqual(f'perf_{param}',tdSql.queryResult[0][0]) + def show_create_sql(self): create_db_sql = self.set_create_database_sql(self.db_param) print(create_db_sql) @@ -200,6 +217,7 @@ class TDTestCase: self.perf_check() self.show_create_sql() self.show_create_sysdb_sql() + self.show_create_systb_sql() def stop(self): tdSql.close() diff --git a/tests/system-test/0-others/sma_index.py b/tests/system-test/0-others/sma_index.py new file mode 100644 index 0000000000000000000000000000000000000000..488342b6039a7beac13d7ecf9c5401cc8a69a145 --- /dev/null +++ b/tests/system-test/0-others/sma_index.py @@ -0,0 +1,48 @@ +import taos +import sys +import time +import socket +import os +import threading + +from util.log import * +from util.sql import * +from util.cases import * +from util.dnodes import * + +class TDTestCase: + hostname = socket.gethostname() + + def init(self, conn, logSql, replicaVar=1): + self.replicaVar = int(replicaVar) + tdLog.debug(f"start to excute {__file__}") + #tdSql.init(conn.cursor()) + tdSql.init(conn.cursor(), logSql) # output sql.txt file + + def create_databases(self): + tdSql.execute("create database db_ms precision 'ms'") + tdSql.execute("create database db_us precision 'us'") + tdSql.execute("create database db_ns precision 'ns'") + + def create_stables(self): + tdSql.execute("CREATE STABLE db_ms.`meters` (`ts` TIMESTAMP, `c0` INT, `c1` TINYINT, `c2` DOUBLE, `c3` VARCHAR(64), `c4` NCHAR(64)) TAGS (`cc` VARCHAR(16))") + tdSql.execute("CREATE STABLE db_us.`meters` (`ts` TIMESTAMP, `c0` INT, `c1` TINYINT, `c2` DOUBLE, `c3` VARCHAR(64), `c4` NCHAR(64)) TAGS (`cc` VARCHAR(16))") + tdSql.execute("CREATE STABLE db_ns.`meters` (`ts` TIMESTAMP, `c0` INT, `c1` TINYINT, `c2` DOUBLE, `c3` VARCHAR(64), `c4` NCHAR(64)) TAGS (`cc` VARCHAR(16))") + + def create_sma_index(self): + tdSql.execute("create sma index sma_index_ms on db_ms.meters function(max(c1), max(c2), min(c1)) interval(6m, 10s) sliding(6m)" ) + tdSql.execute("create sma index sma_index_us on db_us.meters function(max(c1), max(c2), min(c1)) interval(6m, 10s) sliding(6m)" ) + tdSql.execute("create sma index sma_index_ns on db_ns.meters function(max(c1), max(c2), min(c1)) interval(6m, 10s) sliding(6m)" ) + + def run(self): + tdSql.prepare() + self.create_databases() + self.create_stables() + self.create_sma_index() + + def stop(self): + tdSql.close() + tdLog.success(f"{__file__} successfully executed") + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) diff --git a/tests/system-test/0-others/splitVGroup.py b/tests/system-test/0-others/splitVGroup.py index 450996106608aa60cec781392bec8f9463ff2171..9fd00892e45ade46da43e4fb4a38d532109e1fb4 100644 --- a/tests/system-test/0-others/splitVGroup.py +++ b/tests/system-test/0-others/splitVGroup.py @@ -283,14 +283,14 @@ class TDTestCase: # normal table - # all rows - sql = "select * from @db_name.ta" - self.queryDouble(sql) - # count sql = "select count(*) from @db_name.ta" self.queryDouble(sql) + # all rows + sql = "select * from @db_name.ta" + self.queryDouble(sql) + # sum sql = "select sum(c1) from @db_name.ta" self.queryDouble(sql) @@ -316,7 +316,8 @@ class TDTestCase: tdSql.execute(sql) # wait end - for i in range(100): + seconds = 300 + for i in range(seconds): sql ="show transactions;" rows = tdSql.query(sql) if rows == 0: @@ -325,7 +326,7 @@ class TDTestCase: #tdLog.info(f"i={i} wait split vgroup ...") time.sleep(1) - tdLog.exit("split vgroup transaction is not finished after executing 50s") + tdLog.exit(f"split vgroup transaction is not finished after executing {seconds}s") return False # split error @@ -382,6 +383,14 @@ class TDTestCase: self.expectSplitError("topicdb") tdSql.execute("drop topic toa;") self.expectSplitOk("topicdb") + + # compact and check db2 + def compactAndCheck(self): + tdLog.info("compact db2 and check result ...") + # compact + tdSql.execute(f"compact database {self.db2};") + # check result + self.checkResult() # run def run(self): @@ -390,12 +399,15 @@ class TDTestCase: for i in range(5): # split vgroup on db2 + start = time.time() self.splitVGroup(self.db2) + end = time.time() self.vgroups2 += 1 - + # check two db query result same self.checkResult() - tdLog.info(f"split vgroup i={i} passed.") + spend = "%.3f"%(end-start) + tdLog.info(f"split vgroup i={i} passed. spend = {spend}s") # split empty db self.splitEmptyDB() @@ -403,6 +415,9 @@ class TDTestCase: # check topic and stream forib self.checkForbid() + # compact database + self.compactAndCheck() + # stop def stop(self): tdSql.close() diff --git a/tests/system-test/0-others/taosdMonitor.py b/tests/system-test/0-others/taosdMonitor.py index 8094c4e0f555dcd7a5a7340c439a0a34948b212d..a07d7f411e2528033711972addf300fbcd909de4 100644 --- a/tests/system-test/0-others/taosdMonitor.py +++ b/tests/system-test/0-others/taosdMonitor.py @@ -185,28 +185,14 @@ class RequestHandlerImpl(http.server.BaseHTTPRequestHandler): if "total" not in infoDict["disk_infos"]["tempdir"] or infoDict["disk_infos"]["tempdir"]["total"] <= 0: tdLog.exit("total is null!") - # log_infos ==================================== if "log_infos" not in infoDict or infoDict["log_infos"]== None: tdLog.exit("log_infos is null!") - if "logs" not in infoDict["log_infos"] or len(infoDict["log_infos"]["logs"]) < 8:#!= 10: - tdLog.exit("logs is null!") - - if "ts" not in infoDict["log_infos"]["logs"][0] or len(infoDict["log_infos"]["logs"][0]["ts"]) <= 10: - tdLog.exit("ts is null!") - - if "level" not in infoDict["log_infos"]["logs"][0] or infoDict["log_infos"]["logs"][0]["level"] not in ["error" ,"info" , "debug" ,"trace"]: - tdLog.exit("level is null!") - - if "content" not in infoDict["log_infos"]["logs"][0] or len(infoDict["log_infos"]["logs"][0]["ts"]) <= 1: - tdLog.exit("content is null!") - if "summary" not in infoDict["log_infos"] or len(infoDict["log_infos"]["summary"])!= 4: tdLog.exit("summary is null!") - if "total" not in infoDict["log_infos"]["summary"][0] or infoDict["log_infos"]["summary"][0]["total"] < 0 : tdLog.exit("total is null!") @@ -315,4 +301,3 @@ class TDTestCase: tdCases.addLinux(__file__, TDTestCase()) tdCases.addWindows(__file__, TDTestCase()) - diff --git a/tests/system-test/0-others/timeRangeWise.py b/tests/system-test/0-others/timeRangeWise.py new file mode 100644 index 0000000000000000000000000000000000000000..a7dc18aa820caec9166e2f636c11c5b81e4e201c --- /dev/null +++ b/tests/system-test/0-others/timeRangeWise.py @@ -0,0 +1,309 @@ +################################################################### +# Copyright (c) 2016 by TAOS Technologies, Inc. +# All rights reserved. +# +# This file is proprietary and confidential to TAOS Technologies. +# No part of this file may be reproduced, stored, transmitted, +# disclosed or used in any form or by any means other than as +# expressly provided by the written permission from Jianhui Tao +# +################################################################### + +# -*- coding: utf-8 -*- + +import sys +import random +import time +import copy +import string + +import taos +from util.log import * +from util.cases import * +from util.sql import * + +class TDTestCase: + + # random string + def random_string(self, count): + letters = string.ascii_letters + return ''.join(random.choice(letters) for i in range(count)) + + # get col value and total max min ... + def getColsValue(self, i, j): + # c1 value + if random.randint(1, 10) == 5: + c1 = None + else: + c1 = 1 + + # c2 value + if j % 3200 == 0: + c2 = 8764231 + elif random.randint(1, 10) == 5: + c2 = None + else: + c2 = random.randint(-87654297, 98765321) + + + value = f"({self.ts}, " + + # c1 + if c1 is None: + value += "null," + else: + self.c1Cnt += 1 + value += f"{c1}," + # c2 + if c2 is None: + value += "null," + else: + value += f"{c2}," + # total count + self.c2Cnt += 1 + # max + if self.c2Max is None: + self.c2Max = c2 + else: + if c2 > self.c2Max: + self.c2Max = c2 + # min + if self.c2Min is None: + self.c2Min = c2 + else: + if c2 < self.c2Min: + self.c2Min = c2 + # sum + if self.c2Sum is None: + self.c2Sum = c2 + else: + self.c2Sum += c2 + + # c3 same with ts + value += f"{self.ts})" + + # move next 1s interval + self.ts += 1 + + return value + + # insert data + def insertData(self): + tdLog.info("insert data ....") + sqls = "" + for i in range(self.childCnt): + # insert child table + values = "" + pre_insert = f"insert into @db_name.t{i} values " + for j in range(self.childRow): + if values == "": + values = self.getColsValue(i, j) + else: + values += "," + self.getColsValue(i, j) + + # batch insert + if j % self.batchSize == 0 and values != "": + sql = pre_insert + values + self.exeDouble(sql) + values = "" + # append last + if values != "": + sql = pre_insert + values + self.exeDouble(sql) + values = "" + + # insert finished + tdLog.info(f"insert data successfully.\n" + f" inserted child table = {self.childCnt}\n" + f" inserted child rows = {self.childRow}\n" + f" total inserted rows = {self.childCnt*self.childRow}\n") + return + + def exeDouble(self, sql): + # dbname replace + sql1 = sql.replace("@db_name", self.db1) + + if len(sql1) > 100: + tdLog.info(sql1[:100]) + else: + tdLog.info(sql1) + tdSql.execute(sql1) + + sql2 = sql.replace("@db_name", self.db2) + if len(sql2) > 100: + tdLog.info(sql2[:100]) + else: + tdLog.info(sql2) + tdSql.execute(sql2) + + + # prepareEnv + def prepareEnv(self): + # init + self.ts = 1680000000000 + self.childCnt = 2 + self.childRow = 100000 + self.batchSize = 5000 + self.vgroups1 = 4 + self.vgroups2 = 4 + self.db1 = "db1" # no sma + self.db2 = "db2" # have sma + self.smaClause = "interval(10s)" + + # total + self.c1Cnt = 0 + self.c2Cnt = 0 + self.c2Max = None + self.c2Min = None + self.c2Sum = None + + # alter local optimization to treu + sql = "alter local 'querysmaoptimize 1'" + tdSql.execute(sql, 5, True) + + # check forbid mulit-replic on create sma index + sql = f"create database db vgroups {self.vgroups1} replica 3" + tdSql.execute(sql, 5, True) + sql = f"create table db.st(ts timestamp, c1 int, c2 bigint, ts1 timestamp) tags(area int)" + tdSql.execute(sql, 5, True) + + sql = f"create sma index sma_test on db.st function(max(c1),max(c2),min(c1),min(c2)) {self.smaClause};" + tdLog.info(sql) + tdSql.error(sql) + + + # create database db + sql = f"create database @db_name vgroups {self.vgroups1} replica 1" + self.exeDouble(sql) + + # create super talbe st + sql = f"create table @db_name.st(ts timestamp, c1 int, c2 bigint, ts1 timestamp) tags(area int)" + self.exeDouble(sql) + + # create child table + for i in range(self.childCnt): + sql = f"create table @db_name.t{i} using @db_name.st tags({i}) " + self.exeDouble(sql) + + # create sma index on db2 + sql = f"use {self.db2}" + tdSql.execute(sql) + sql = f"create sma index sma_index_maxmin on {self.db2}.st function(max(c1),max(c2),min(c1),min(c2)) {self.smaClause};" + tdLog.info(sql) + tdSql.execute(sql) + + # insert data + self.insertData() + + # check data correct + def checkExpect(self, sql, expectVal): + tdSql.query(sql) + rowCnt = tdSql.getRows() + for i in range(rowCnt): + val = tdSql.getData(i,0) + if val != expectVal: + tdLog.exit(f"Not expect . query={val} expect={expectVal} i={i} sql={sql}") + return False + + tdLog.info(f"check expect ok. sql={sql} expect ={expectVal} rowCnt={rowCnt}") + return True + + # init + def init(self, conn, logSql, replicaVar=1): + seed = time.clock_gettime(time.CLOCK_REALTIME) + random.seed(seed) + self.replicaVar = int(replicaVar) + tdLog.debug(f"start to excute {__file__}") + tdSql.init(conn.cursor(), True) + + # check query result same + def queryDoubleImpl(self, sql): + # sql + sql1 = sql.replace('@db_name', self.db1) + tdLog.info(sql1) + start1 = time.time() + rows1 = tdSql.query(sql1) + spend1 = time.time() - start1 + res1 = copy.copy(tdSql.queryResult) + + sql2 = sql.replace('@db_name', self.db2) + tdLog.info(sql2) + start2 = time.time() + tdSql.query(sql2) + spend2 = time.time() - start2 + res2 = tdSql.queryResult + + rowlen1 = len(res1) + rowlen2 = len(res2) + + if rowlen1 != rowlen2: + tdLog.info(f"check error. rowlen1={rowlen1} rowlen2={rowlen2} both not equal.") + return False + + for i in range(rowlen1): + row1 = res1[i] + row2 = res2[i] + collen1 = len(row1) + collen2 = len(row2) + if collen1 != collen2: + tdLog.info(f"checkerror. collen1={collen1} collen2={collen2} both not equal.") + return False + for j in range(collen1): + if row1[j] != row2[j]: + tdLog.exit(f"col={j} col1={row1[j]} col2={row2[j]} both col not equal.") + return False + + # warning performance + multiple = spend1/spend2 + tdLog.info("spend1=%.6fs spend2=%.6fs multiple=%.1f"%(spend1, spend2, multiple)) + if spend2 > spend1 and multiple < 4: + tdLog.info(f"performace not reached: multiple(spend1/spend)={multiple} require is >=4 ") + return False + + return True + + # check query result same + def queryDouble(self, sql, tryCount=60, gap=1): + for i in range(tryCount): + if self.queryDoubleImpl(sql): + return True + # error + tdLog.info(f"queryDouble return false, try loop={i}") + time.sleep(gap) + + tdLog.exit(f"queryDouble try {tryCount} times, but all failed.") + return False + + # check result + def checkResult(self): + + # max + sql = f"select max(c1) from @db_name.st {self.smaClause}" + self.queryDouble(sql) + + # min + sql = f"select max(c2) from @db_name.st {self.smaClause}" + self.queryDouble(sql) + + # mix + sql = f"select max(c1),max(c2),min(c1),min(c2) from @db_name.st {self.smaClause}" + self.queryDouble(sql) + + + # run + def run(self): + # prepare env + self.prepareEnv() + + # check two db query result same + tdLog.info(f"check have sma(db1) and no sma(db2) performace...") + self.checkResult() + + # stop + def stop(self): + tdSql.close() + tdLog.success(f"{__file__} successfully executed") + + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) diff --git a/tests/system-test/0-others/user_manage.py b/tests/system-test/0-others/user_manage.py index 1e33d4bb1c628bd43569b68d8f7fc07e68f2bec8..6f90a2873afa4e08c1ec4b72457b6cba9f232d48 100644 --- a/tests/system-test/0-others/user_manage.py +++ b/tests/system-test/0-others/user_manage.py @@ -115,7 +115,6 @@ class TDTestCase: jiacy0_read_conn = taos.connect(user='jiacy0_read', password='123') jiacy0_write_conn = taos.connect(user='jiacy0_write', password='123') jiacy0_none_conn = taos.connect(user='jiacy0_none', password='123') - tdSql.execute('alter database db wal_retention_period 3600') tdSql.execute('create topic root_db as select * from db.stb') for user in [jiacy1_all_conn, jiacy1_read_conn, jiacy0_all_conn, jiacy0_read_conn]: user.execute(f'create topic db_jiacy as select * from db.stb') diff --git a/tests/system-test/1-insert/boundary.py b/tests/system-test/1-insert/boundary.py index 29dcbc7c461b01d61c784cbaa30c2a724e156e2a..4476236ca65666942e67ecca3c2aaf0abaa48038 100644 --- a/tests/system-test/1-insert/boundary.py +++ b/tests/system-test/1-insert/boundary.py @@ -33,7 +33,7 @@ class TDTestCase: self.colname_length_boundary = self.boundary.COL_KEY_MAX_LENGTH self.tagname_length_boundary = self.boundary.TAG_KEY_MAX_LENGTH self.username_length_boundary = 23 - self.password_length_boundary = 128 + self.password_length_boundary = 31 def dbname_length_check(self): dbname_length = randint(1,self.dbname_length_boundary-1) for dbname in [tdCom.get_long_name(self.dbname_length_boundary),tdCom.get_long_name(dbname_length)]: diff --git a/tests/system-test/1-insert/delete_stable.py b/tests/system-test/1-insert/delete_stable.py index 8ebe7b6692e31bba12bdc0a3cbc885112eb96562..67561c51e506056d34eff5a0a7f1ee3eb7367fdb 100644 --- a/tests/system-test/1-insert/delete_stable.py +++ b/tests/system-test/1-insert/delete_stable.py @@ -24,10 +24,11 @@ from util.common import * from util.sqlset import TDSetSql class TDTestCase: + updatecfgDict = {'tsdbdebugFlag': 143} def init(self, conn, logSql, replicaVar=1): self.replicaVar = int(replicaVar) tdLog.debug("start to execute %s" % __file__) - tdSql.init(conn.cursor()) + tdSql.init(conn.cursor(), True) self.dbname = 'db_test' self.ns_dbname = 'ns_test' self.us_dbname = 'us_test' diff --git a/tests/system-test/1-insert/drop.py b/tests/system-test/1-insert/drop.py index 9954b3557e23df193a4c4f84c819fc508c39f5bb..8775450ff0eee5ce1bd82d6eb83dc2f85ed44ba4 100644 --- a/tests/system-test/1-insert/drop.py +++ b/tests/system-test/1-insert/drop.py @@ -129,6 +129,12 @@ class TDTestCase: tdSql.query(f'select * from information_schema.ins_topics where topic_name = "{topic_name}"') tdSql.checkEqual(tdSql.queryResult[0][3],f'create topic {topic_name} as select c0 from {self.dbname}.{stbname}') tdSql.execute(f'drop topic {topic_name}') + + #TD-25222 + long_topic_name="hhhhjjhhhhqwertyuiasdfghjklzxcvbnmhhhhjjhhhhqwertyuiasdfghjklzxcvbnmhhhhjjhhhhqwertyuiasdfghjklzxcvbnm" + tdSql.execute(f'create topic {long_topic_name} as select * from {self.dbname}.{stbname}') + tdSql.execute(f'drop topic {long_topic_name}') + tdSql.execute(f'drop database {self.dbname}') def drop_stream_check(self): diff --git a/tests/system-test/1-insert/opentsdb_json_taosc_insert.py b/tests/system-test/1-insert/opentsdb_json_taosc_insert.py index 857a8e3a32cfab505629a3b8e41397a37b4b73bd..91ef1e70a3ac1fe7350eaf216fe33bdfa119025b 100644 --- a/tests/system-test/1-insert/opentsdb_json_taosc_insert.py +++ b/tests/system-test/1-insert/opentsdb_json_taosc_insert.py @@ -24,6 +24,8 @@ import threading import json class TDTestCase: + updatecfgDict = {'clientCfg': {'smlDot2Underline': 0}} + def init(self, conn, logSql, replicaVar=1): self.replicaVar = int(replicaVar) tdLog.debug("start to execute %s" % __file__) diff --git a/tests/system-test/1-insert/opentsdb_telnet_line_taosc_insert.py b/tests/system-test/1-insert/opentsdb_telnet_line_taosc_insert.py index 351cf49e3a217a44e93bbaf9c8c69ce2fa76c190..d419aee12cd34c20b6a3d1e5256b1d704df5ad2b 100644 --- a/tests/system-test/1-insert/opentsdb_telnet_line_taosc_insert.py +++ b/tests/system-test/1-insert/opentsdb_telnet_line_taosc_insert.py @@ -28,6 +28,8 @@ if platform.system().lower() == 'windows': sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='utf8') class TDTestCase: + updatecfgDict = {'clientCfg': {'smlDot2Underline': 0}} + def init(self, conn, logSql, replicaVar=1): self.replicaVar = int(replicaVar) tdLog.debug("start to execute %s" % __file__) diff --git a/tests/system-test/2-query/columnLenUpdated.py b/tests/system-test/2-query/columnLenUpdated.py index e43b32a716017702382ce0384bb377d692e64684..93d9a492f946db423b78f7891b533af8c9a6da9f 100644 --- a/tests/system-test/2-query/columnLenUpdated.py +++ b/tests/system-test/2-query/columnLenUpdated.py @@ -202,7 +202,7 @@ class TDTestCase: if retCode != "TAOS_OK": tdLog.exit("taos -s fail") - tdSql.query("select count(*) from stb group by tg1") + tdSql.query("select count(*) from stb group by tg1 order by count(*) desc") tdSql.checkData(0, 0, 2) tdSql.checkData(1, 0, 1) diff --git a/tests/system-test/2-query/interp.py b/tests/system-test/2-query/interp.py index b6cefbe36fda9954188d59f813db9be4069a1af8..986c63839b1c6e9cfc8c8c6b857a668146d03237 100644 --- a/tests/system-test/2-query/interp.py +++ b/tests/system-test/2-query/interp.py @@ -147,6 +147,57 @@ class TDTestCase: tdSql.checkData(11, 0, 15) tdSql.checkData(12, 0, 1) + for col in col_list: + tdSql.query(f"select interp({col}) from {dbname}.{tbname} range('2020-02-01 00:00:04', '2020-02-01 00:00:16') every(1s) fill(value, 1.0)") + tdSql.checkRows(13) + tdSql.checkData(0, 0, 1) + tdSql.checkData(1, 0, 5) + tdSql.checkData(2, 0, 1) + tdSql.checkData(3, 0, 1) + tdSql.checkData(4, 0, 1) + tdSql.checkData(5, 0, 1) + tdSql.checkData(6, 0, 10) + tdSql.checkData(7, 0, 1) + tdSql.checkData(8, 0, 1) + tdSql.checkData(9, 0, 1) + tdSql.checkData(10, 0, 1) + tdSql.checkData(11, 0, 15) + tdSql.checkData(12, 0, 1) + + for col in col_list: + tdSql.query(f"select interp({col}) from {dbname}.{tbname} range('2020-02-01 00:00:04', '2020-02-01 00:00:16') every(1s) fill(value, true)") + tdSql.checkRows(13) + tdSql.checkData(0, 0, 1) + tdSql.checkData(1, 0, 5) + tdSql.checkData(2, 0, 1) + tdSql.checkData(3, 0, 1) + tdSql.checkData(4, 0, 1) + tdSql.checkData(5, 0, 1) + tdSql.checkData(6, 0, 10) + tdSql.checkData(7, 0, 1) + tdSql.checkData(8, 0, 1) + tdSql.checkData(9, 0, 1) + tdSql.checkData(10, 0, 1) + tdSql.checkData(11, 0, 15) + tdSql.checkData(12, 0, 1) + + for col in col_list: + tdSql.query(f"select interp({col}) from {dbname}.{tbname} range('2020-02-01 00:00:04', '2020-02-01 00:00:16') every(1s) fill(value, NULL)") + tdSql.checkRows(13) + tdSql.checkData(0, 0, None) + tdSql.checkData(1, 0, 5) + tdSql.checkData(2, 0, None) + tdSql.checkData(3, 0, None) + tdSql.checkData(4, 0, None) + tdSql.checkData(5, 0, None) + tdSql.checkData(6, 0, 10) + tdSql.checkData(7, 0, None) + tdSql.checkData(8, 0, None) + tdSql.checkData(9, 0, None) + tdSql.checkData(10, 0, None) + tdSql.checkData(11, 0, 15) + tdSql.checkData(12, 0, None) + tdSql.query(f"select interp(c4) from {dbname}.{tbname} range('2020-02-01 00:00:04', '2020-02-01 00:00:16') every(1s) fill(value, 1)") tdSql.checkRows(13) tdSql.checkData(0, 0, 1.0) @@ -163,6 +214,54 @@ class TDTestCase: tdSql.checkData(11, 0, 15.0) tdSql.checkData(12, 0, 1.0) + tdSql.query(f"select interp(c4) from {dbname}.{tbname} range('2020-02-01 00:00:04', '2020-02-01 00:00:16') every(1s) fill(value, 1.0)") + tdSql.checkRows(13) + tdSql.checkData(0, 0, 1.0) + tdSql.checkData(1, 0, 5.0) + tdSql.checkData(2, 0, 1.0) + tdSql.checkData(3, 0, 1.0) + tdSql.checkData(4, 0, 1.0) + tdSql.checkData(5, 0, 1.0) + tdSql.checkData(6, 0, 10.0) + tdSql.checkData(7, 0, 1.0) + tdSql.checkData(8, 0, 1.0) + tdSql.checkData(9, 0, 1.0) + tdSql.checkData(10, 0, 1.0) + tdSql.checkData(11, 0, 15.0) + tdSql.checkData(12, 0, 1.0) + + tdSql.query(f"select interp(c4) from {dbname}.{tbname} range('2020-02-01 00:00:04', '2020-02-01 00:00:16') every(1s) fill(value, true)") + tdSql.checkRows(13) + tdSql.checkData(0, 0, 1.0) + tdSql.checkData(1, 0, 5.0) + tdSql.checkData(2, 0, 1.0) + tdSql.checkData(3, 0, 1.0) + tdSql.checkData(4, 0, 1.0) + tdSql.checkData(5, 0, 1.0) + tdSql.checkData(6, 0, 10.0) + tdSql.checkData(7, 0, 1.0) + tdSql.checkData(8, 0, 1.0) + tdSql.checkData(9, 0, 1.0) + tdSql.checkData(10, 0, 1.0) + tdSql.checkData(11, 0, 15.0) + tdSql.checkData(12, 0, 1.0) + + tdSql.query(f"select interp(c4) from {dbname}.{tbname} range('2020-02-01 00:00:04', '2020-02-01 00:00:16') every(1s) fill(value, NULL)") + tdSql.checkRows(13) + tdSql.checkData(0, 0, None) + tdSql.checkData(1, 0, 5.0) + tdSql.checkData(2, 0, None) + tdSql.checkData(3, 0, None) + tdSql.checkData(4, 0, None) + tdSql.checkData(5, 0, None) + tdSql.checkData(6, 0, 10.0) + tdSql.checkData(7, 0, None) + tdSql.checkData(8, 0, None) + tdSql.checkData(9, 0, None) + tdSql.checkData(10, 0, None) + tdSql.checkData(11, 0, 15.0) + tdSql.checkData(12, 0, None) + tdSql.query(f"select interp(c5) from {dbname}.{tbname} range('2020-02-01 00:00:04', '2020-02-01 00:00:16') every(1s) fill(value, 1)") tdSql.checkRows(13) tdSql.checkData(0, 0, 1.0) @@ -179,6 +278,54 @@ class TDTestCase: tdSql.checkData(11, 0, 15.0) tdSql.checkData(12, 0, 1.0) + tdSql.query(f"select interp(c5) from {dbname}.{tbname} range('2020-02-01 00:00:04', '2020-02-01 00:00:16') every(1s) fill(value, 1.0)") + tdSql.checkRows(13) + tdSql.checkData(0, 0, 1.0) + tdSql.checkData(1, 0, 5.0) + tdSql.checkData(2, 0, 1.0) + tdSql.checkData(3, 0, 1.0) + tdSql.checkData(4, 0, 1.0) + tdSql.checkData(5, 0, 1.0) + tdSql.checkData(6, 0, 10.0) + tdSql.checkData(7, 0, 1.0) + tdSql.checkData(8, 0, 1.0) + tdSql.checkData(9, 0, 1.0) + tdSql.checkData(10, 0, 1.0) + tdSql.checkData(11, 0, 15.0) + tdSql.checkData(12, 0, 1.0) + + tdSql.query(f"select interp(c5) from {dbname}.{tbname} range('2020-02-01 00:00:04', '2020-02-01 00:00:16') every(1s) fill(value, true)") + tdSql.checkRows(13) + tdSql.checkData(0, 0, 1.0) + tdSql.checkData(1, 0, 5.0) + tdSql.checkData(2, 0, 1.0) + tdSql.checkData(3, 0, 1.0) + tdSql.checkData(4, 0, 1.0) + tdSql.checkData(5, 0, 1.0) + tdSql.checkData(6, 0, 10.0) + tdSql.checkData(7, 0, 1.0) + tdSql.checkData(8, 0, 1.0) + tdSql.checkData(9, 0, 1.0) + tdSql.checkData(10, 0, 1.0) + tdSql.checkData(11, 0, 15.0) + tdSql.checkData(12, 0, 1.0) + + tdSql.query(f"select interp(c5) from {dbname}.{tbname} range('2020-02-01 00:00:04', '2020-02-01 00:00:16') every(1s) fill(value, NULL)") + tdSql.checkRows(13) + tdSql.checkData(0, 0, None) + tdSql.checkData(1, 0, 5.0) + tdSql.checkData(2, 0, None) + tdSql.checkData(3, 0, None) + tdSql.checkData(4, 0, None) + tdSql.checkData(5, 0, None) + tdSql.checkData(6, 0, 10.0) + tdSql.checkData(7, 0, None) + tdSql.checkData(8, 0, None) + tdSql.checkData(9, 0, None) + tdSql.checkData(10, 0, None) + tdSql.checkData(11, 0, 15.0) + tdSql.checkData(12, 0, None) + tdSql.query(f"select interp(c6) from {dbname}.{tbname} range('2020-02-01 00:00:04', '2020-02-01 00:00:16') every(1s) fill(value, 1)") tdSql.checkRows(13) tdSql.checkData(0, 0, True) @@ -195,6 +342,54 @@ class TDTestCase: tdSql.checkData(11, 0, True) tdSql.checkData(12, 0, True) + tdSql.query(f"select interp(c6) from {dbname}.{tbname} range('2020-02-01 00:00:04', '2020-02-01 00:00:16') every(1s) fill(value, 1.0)") + tdSql.checkRows(13) + tdSql.checkData(0, 0, True) + tdSql.checkData(1, 0, True) + tdSql.checkData(2, 0, True) + tdSql.checkData(3, 0, True) + tdSql.checkData(4, 0, True) + tdSql.checkData(5, 0, True) + tdSql.checkData(6, 0, True) + tdSql.checkData(7, 0, True) + tdSql.checkData(8, 0, True) + tdSql.checkData(9, 0, True) + tdSql.checkData(10, 0, True) + tdSql.checkData(11, 0, True) + tdSql.checkData(12, 0, True) + + tdSql.query(f"select interp(c6) from {dbname}.{tbname} range('2020-02-01 00:00:04', '2020-02-01 00:00:16') every(1s) fill(value, true)") + tdSql.checkRows(13) + tdSql.checkData(0, 0, True) + tdSql.checkData(1, 0, True) + tdSql.checkData(2, 0, True) + tdSql.checkData(3, 0, True) + tdSql.checkData(4, 0, True) + tdSql.checkData(5, 0, True) + tdSql.checkData(6, 0, True) + tdSql.checkData(7, 0, True) + tdSql.checkData(8, 0, True) + tdSql.checkData(9, 0, True) + tdSql.checkData(10, 0, True) + tdSql.checkData(11, 0, True) + tdSql.checkData(12, 0, True) + + tdSql.query(f"select interp(c6) from {dbname}.{tbname} range('2020-02-01 00:00:04', '2020-02-01 00:00:16') every(1s) fill(value, NULL)") + tdSql.checkRows(13) + tdSql.checkData(0, 0, None) + tdSql.checkData(1, 0, True) + tdSql.checkData(2, 0, None) + tdSql.checkData(3, 0, None) + tdSql.checkData(4, 0, None) + tdSql.checkData(5, 0, None) + tdSql.checkData(6, 0, True) + tdSql.checkData(7, 0, None) + tdSql.checkData(8, 0, None) + tdSql.checkData(9, 0, None) + tdSql.checkData(10, 0, None) + tdSql.checkData(11, 0, True) + tdSql.checkData(12, 0, None) + ## {} ... tdSql.query(f"select interp(c0) from {dbname}.{tbname} range('2020-02-01 00:00:01', '2020-02-01 00:00:04') every(1s) fill(value, 1)") tdSql.checkRows(4) @@ -2587,25 +2782,25 @@ class TDTestCase: tdSql.checkData(0, 0, '2020-02-02 00:00:00.000') - tdSql.checkData(0, 2, False) + tdSql.checkData(0, 2, None) tdSql.checkData(1, 2, False) - tdSql.checkData(2, 2, False) + tdSql.checkData(2, 2, None) tdSql.checkData(3, 2, True) - tdSql.checkData(4, 2, False) + tdSql.checkData(4, 2, None) tdSql.checkData(5, 2, False) - tdSql.checkData(6, 2, False) + tdSql.checkData(6, 2, None) tdSql.checkData(7, 2, True) - tdSql.checkData(8, 2, False) + tdSql.checkData(8, 2, None) tdSql.checkData(9, 2, True) - tdSql.checkData(10, 2, False) + tdSql.checkData(10, 2, None) tdSql.checkData(11, 2, False) - tdSql.checkData(12, 2, False) + tdSql.checkData(12, 2, None) tdSql.checkData(13, 2, False) - tdSql.checkData(14, 2, False) + tdSql.checkData(14, 2, None) tdSql.checkData(15, 2, None) - tdSql.checkData(16, 2, False) + tdSql.checkData(16, 2, None) tdSql.checkData(17, 2, None) - tdSql.checkData(18, 2, False) + tdSql.checkData(18, 2, None) tdSql.checkData(18, 0, '2020-02-02 00:00:18.000') diff --git a/tests/system-test/2-query/smaBasic.py b/tests/system-test/2-query/smaBasic.py index 43c379ee53889aa0af5410332bf9d02cfb1ca291..c221a70605064a3d23210c7a064c36657d5a9b5a 100644 --- a/tests/system-test/2-query/smaBasic.py +++ b/tests/system-test/2-query/smaBasic.py @@ -127,7 +127,7 @@ class TDTestCase: self.c2Sum = None # create database db - sql = f"create database db vgroups 5 replica 3" + sql = f"create database db vgroups 5 replica 3 stt_trigger 1" tdLog.info(sql) tdSql.execute(sql) sql = f"use db" diff --git a/tests/system-test/2-query/sml.py b/tests/system-test/2-query/sml.py index 2f97118fbf0893a253499ab7c9ba9425bd9e9c5d..b3aeb7219439415cb06ac4661a524466429a105d 100644 --- a/tests/system-test/2-query/sml.py +++ b/tests/system-test/2-query/sml.py @@ -15,7 +15,7 @@ sys.path.append("./7-tmq") from tmqCommon import * class TDTestCase: - updatecfgDict = {'clientCfg': {'smlChildTableName': 'dataModelName', 'fqdn': 'localhost'}, 'fqdn': 'localhost'} + updatecfgDict = {'clientCfg': {'smlChildTableName': 'dataModelName', 'fqdn': 'localhost', 'smlDot2Underline': 0}, 'fqdn': 'localhost'} print("===================: ", updatecfgDict) def init(self, conn, logSql, replicaVar=1): @@ -101,6 +101,15 @@ class TDTestCase: tdSql.query(f"desc {dbname}.macylr") tdSql.checkRows(25) + + tdSql.query(f"select * from ts3724.`.stb2`") + tdSql.checkRows(1) + + tdSql.query(f"select * from ts3724.`stb.2`") + tdSql.checkRows(1) + + tdSql.query(f"select * from ts3724.`stb2.`") + tdSql.checkRows(1) return def run(self): diff --git a/tests/system-test/2-query/sml_TS-3724.py b/tests/system-test/2-query/sml_TS-3724.py new file mode 100644 index 0000000000000000000000000000000000000000..a8b16c4662596db00a79f03eb602be7ccc33ef6c --- /dev/null +++ b/tests/system-test/2-query/sml_TS-3724.py @@ -0,0 +1,125 @@ +import taos +import sys +import time +import socket +import os +import threading + +from util.log import * +from util.sql import * +from util.cases import * +from util.dnodes import * +from util.common import * +sys.path.append("./7-tmq") +from tmqCommon import * + +class TDTestCase: + updatecfgDict = {'clientCfg': {'smlChildTableName': 'dataModelName', 'fqdn': 'localhost', 'smlTsDefaultName': "times"}, 'fqdn': 'localhost'} + print("===================: ", updatecfgDict) + + def init(self, conn, logSql, replicaVar=1): + self.replicaVar = int(replicaVar) + tdLog.debug(f"start to excute {__file__}") + tdSql.init(conn.cursor(), True) + #tdSql.init(conn.cursor(), logSql) # output sql.txt file + + def checkContent(self, dbname="sml_db"): + simClientCfg="%s/taos.cfg"%tdDnodes.getSimCfgPath() + buildPath = tdCom.getBuildPath() + cmdStr = '%s/build/bin/sml_test %s'%(buildPath, simClientCfg) + print("cmdStr:", cmdStr) + tdLog.info(cmdStr) + ret = os.system(cmdStr) + if ret != 0: + tdLog.info("sml_test ret != 0") + + tdSql.query(f"select * from ts3303.stb2") + tdSql.query(f"select * from ts3303.meters") + + # tdSql.execute('use sml_db') + tdSql.query(f"select * from {dbname}.t_b7d815c9222ca64cdf2614c61de8f211") + tdSql.checkRows(1) + + tdSql.checkData(0, 0, '2016-01-01 08:00:07.000') + tdSql.checkData(0, 1, 2000) + tdSql.checkData(0, 2, 200) + tdSql.checkData(0, 3, 15) + tdSql.checkData(0, 4, 24.5208) + tdSql.checkData(0, 5, 28.09377) + tdSql.checkData(0, 6, 428) + tdSql.checkData(0, 7, 0) + tdSql.checkData(0, 8, 304) + tdSql.checkData(0, 9, 0) + tdSql.checkData(0, 10, 25) + + tdSql.query(f"select * from {dbname}.readings") + tdSql.checkRows(9) + + tdSql.query(f"select distinct tbname from {dbname}.readings") + tdSql.checkRows(4) + + tdSql.query(f"select * from {dbname}.t_0799064f5487946e5d22164a822acfc8 order by times") + tdSql.checkRows(2) + tdSql.checkData(0, 3, "kk") + tdSql.checkData(1, 3, "") + + + tdSql.query(f"select distinct tbname from {dbname}.`sys_if_bytes_out`") + tdSql.checkRows(2) + + tdSql.query(f"select * from {dbname}.t_fc70dec6677d4277c5d9799c4da806da order by times") + tdSql.checkRows(2) + tdSql.checkData(0, 1, 1.300000000) + tdSql.checkData(1, 1, 13.000000000) + + tdSql.query(f"select * from {dbname}.`sys_procs_running`") + tdSql.checkRows(1) + tdSql.checkData(0, 1, 42.000000000) + tdSql.checkData(0, 2, "web01") + + tdSql.query(f"select distinct tbname from {dbname}.`sys_cpu_nice`") + tdSql.checkRows(3) + + tdSql.query(f"select * from {dbname}.`sys_cpu_nice` order by times") + tdSql.checkRows(4) + tdSql.checkData(0, 1, 13.000000000) + tdSql.checkData(0, 2, "web01") + tdSql.checkData(0, 3, None) + tdSql.checkData(0, 4, "lga") + + tdSql.checkData(1, 1, 9.000000000) + tdSql.checkData(1, 2, "web02") + tdSql.checkData(3, 3, "t1") + tdSql.checkData(0, 4, "lga") + + tdSql.query(f"select * from {dbname}.macylr") + tdSql.checkRows(2) + + tdSql.query(f"select * from {dbname}.qelhxo") + tdSql.checkRows(5) + + tdSql.query(f"desc {dbname}.macylr") + tdSql.checkRows(25) + + tdSql.query(f"select * from ts3724._stb2") + tdSql.checkRows(1) + + tdSql.query(f"select * from ts3724.stb_2") + tdSql.checkRows(1) + + tdSql.query(f"select * from ts3724.stb2_") + tdSql.checkRows(1) + return + + def run(self): + tdSql.prepare() + self.checkContent() + + def stop(self): + tdSql.close() + tdLog.success(f"{__file__} successfully executed") + + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) + diff --git a/tests/system-test/5-taos-tools/taosbenchmark/insertMix.py b/tests/system-test/5-taos-tools/taosbenchmark/insertMix.py index 60daa8cdc27f5e683239d6722e4f28ae1b8d90d1..b4046b8c98b311d8f73fc811cf4e70a2d2510e38 100644 --- a/tests/system-test/5-taos-tools/taosbenchmark/insertMix.py +++ b/tests/system-test/5-taos-tools/taosbenchmark/insertMix.py @@ -79,6 +79,11 @@ class TDTestCase: tdSql.query("select count(*) from (select * from meters order by ts desc)") tdSql.checkData(0, 0, allCnt) + rowCnt = tdSql.query("select tbname, count(*) from meters partition by tbname slimit 11") + if rowCnt != 10: + tdLog.exit("partition by tbname should return 10 rows of table data which is " + str(rowCnt)) + return + def run(self): binPath = self.getPath() diff --git a/tests/system-test/6-cluster/5dnode3mnodeRoll.py b/tests/system-test/6-cluster/5dnode3mnodeRoll.py new file mode 100644 index 0000000000000000000000000000000000000000..8d7d4fb3e52c5ab733a385601dc10f4aacba6701 --- /dev/null +++ b/tests/system-test/6-cluster/5dnode3mnodeRoll.py @@ -0,0 +1,347 @@ +from ssl import ALERT_DESCRIPTION_CERTIFICATE_UNOBTAINABLE +from numpy import row_stack +import taos +import sys +import time +import os + +from util.log import * +from util.sql import * +from util.cases import * +from util.dnodes import TDDnodes +from util.dnodes import TDDnode +from util.cluster import * +from util.common import * +sys.path.append("./6-cluster") +from clusterCommonCreate import * +from clusterCommonCheck import clusterComCheck +from pathlib import Path +from taos.tmq import Consumer + + +import time +import socket +import subprocess +from multiprocessing import Process +import threading +import time +import json + +BASEVERSION = "3.0.7.0" + +class TDTestCase: + + def init(self, conn, logSql, replicaVar=1): + tdLog.debug(f"start to excute {__file__}") + self.TDDnodes = None + tdSql.init(conn.cursor()) + self.host = socket.gethostname() + self.replicaVar = int(replicaVar) + + def checkProcessPid(self,processName): + i=0 + while i<60: + print(f"wait stop {processName}") + processPid = subprocess.getstatusoutput(f'ps aux|grep {processName} |grep -v "grep"|awk \'{{print $2}}\'')[1] + print(f"times:{i},{processName}-pid:{processPid}") + if(processPid == ""): + break + i += 1 + sleep(1) + else: + print(f'this processName is not stoped in 60s') + + + def getBuildPath(self): + selfPath = os.path.dirname(os.path.realpath(__file__)) + + if ("community" in selfPath): + projPath = selfPath[:selfPath.find("community")] + else: + projPath = selfPath[:selfPath.find("tests")] + + for root, dirs, files in os.walk(projPath): + if ("taosd" in files or "taosd.exe" in files): + rootRealPath = os.path.dirname(os.path.realpath(root)) + if ("packaging" not in rootRealPath): + buildPath = root[:len(root)-len("/build/bin")] + break + return buildPath + + def getCfgPath(self): + buildPath = self.getBuildPath() + selfPath = os.path.dirname(os.path.realpath(__file__)) + + if ("community" in selfPath): + cfgPath = buildPath + "/../sim/dnode1/cfg/" + else: + cfgPath = buildPath + "/../sim/dnode1/cfg/" + + return cfgPath + + def installTaosd(self,bPath,cPath): + # os.system(f"rmtaos && mkdir -p {self.getBuildPath()}/build/lib/temp && mv {self.getBuildPath()}/build/lib/libtaos.so* {self.getBuildPath()}/build/lib/temp/ ") + # os.system(f" mv {bPath}/build {bPath}/build_bak ") + # os.system(f"mv {self.getBuildPath()}/build/lib/libtaos.so {self.getBuildPath()}/build/lib/libtaos.so_bak ") + # os.system(f"mv {self.getBuildPath()}/build/lib/libtaos.so.1 {self.getBuildPath()}/build/lib/libtaos.so.1_bak ") + + packagePath = "/usr/local/src/" + dataPath = cPath + "/../data/" + packageName = "TDengine-server-"+ BASEVERSION + "-Linux-x64.tar.gz" + packageTPath = packageName.split("-Linux-")[0] + my_file = Path(f"{packagePath}/{packageName}") + if not my_file.exists(): + print(f"{packageName} is not exists") + tdLog.info(f"cd {packagePath} && wget https://www.tdengine.com/assets-download/3.0/{packageName}") + os.system(f"cd {packagePath} && wget https://www.tdengine.com/assets-download/3.0/{packageName}") + else: + print(f"{packageName} has been exists") + os.system(f" cd {packagePath} && tar xvf {packageName} && cd {packageTPath} && ./install.sh -e no " ) + # tdDnodes.stop(1) + # print(f"start taosd: rm -rf {dataPath}/* && nohup taosd -c {cPath} & ") + # os.system(f"rm -rf {dataPath}/* && nohup taosd -c {cPath} & " ) + # sleep(5) + + + def buildTaosd(self,bPath): + # os.system(f"mv {bPath}/build_bak {bPath}/build ") + os.system(f" cd {bPath}/ && make install ") + + def is_list_same_as_ordered_list(self,unordered_list, ordered_list): + sorted_list = sorted(unordered_list) + return sorted_list == ordered_list + + def insertAllData(self,cPath,dbname,tableNumbers,recordNumbers): + tdLog.info(f"insertAllData") + # tdLog.info(f" LD_LIBRARY_PATH=/usr/lib taosBenchmark -d dbtest -t {tableNumbers} -c {cPath} -n {recordNumbers} -v 2 -a 3 -y -k 10 -z 5 ") + # os.system(f"LD_LIBRARY_PATH=/usr/lib taosBenchmark -d dbtest -t {tableNumbers} -c {cPath} -n {recordNumbers} -v 2 -a 3 -y -k 10 -z 5 ") + + print(f"sed -i 's/\"cfgdir\".*/\"cfgdir\": \"{cPath}\",/' 6-cluster/rollup.json && sed -i '0,/\"name\":.*/s/\"name\":.*/\"name\": \"{dbname}\",/' 6-cluster/rollup.json && sed -i 's/\"childtable_count\":.*/\"childtable_count\": {tableNumbers},/' 6-cluster/rollup.json && sed -i 's/\"insert_rows\":.*/\"insert_rows\": {recordNumbers},/' 6-cluster/rollup.json" ) + os.system(f"sed -i 's/\"cfgdir\".*/\"cfgdir\": \"{cPath}\",/' 6-cluster/rollup.json && sed -i '0,/\"name\":.*/s/\"name\":.*/\"name\": \"{dbname}\",/' 6-cluster/rollup.json && sed -i 's/\"childtable_count\":.*/\"childtable_count\": {tableNumbers},/' 6-cluster/rollup.json && sed -i 's/\"insert_rows\":.*/\"insert_rows\": {recordNumbers},/' 6-cluster/rollup.json") + print("LD_LIBRARY_PATH=/usr/lib taosBenchmark -f 6-cluster/rollup.json -y -k 10 -z 5") + os.system("LD_LIBRARY_PATH=/usr/lib taosBenchmark -f 6-cluster/rollup.json -y -k 10 -z 5 ") + + + def insertData(self,countstart,countstop): + # fisrt add data : db\stable\childtable\general table + + for couti in range(countstart,countstop): + tdLog.debug("drop database if exists db%d" %couti) + tdSql.execute("drop database if exists db%d" %couti) + print("create database if not exists db%d replica 1 duration 300" %couti) + tdSql.execute("create database if not exists db%d replica 1 duration 300" %couti) + tdSql.execute("use db%d" %couti) + tdSql.execute( + '''create table stb1 + (ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(16),c9 nchar(32), c10 timestamp) + tags (t1 int) + ''' + ) + tdSql.execute( + ''' + create table t1 + (ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(16),c9 nchar(32), c10 timestamp) + ''' + ) + for i in range(4): + tdSql.execute(f'create table ct{i+1} using stb1 tags ( {i+1} )') + + + def fiveDnodeThreeMnode(self,dnodeNumbers,mnodeNums,restartNumbers,stopRole): + tdLog.printNoPrefix("======== test case 1: ") + paraDict = {'dbName': 'db0_0', + 'dropFlag': 1, + 'event': '', + 'vgroups': 4, + 'replica': 1, + 'stbName': 'stb', + 'stbNumbers': 2, + 'colPrefix': 'c', + 'tagPrefix': 't', + 'colSchema': [{'type': 'INT', 'count':1}, {'type': 'binary', 'len':20, 'count':1}], + 'tagSchema': [{'type': 'INT', 'count':1}, {'type': 'binary', 'len':20, 'count':1}], + 'ctbPrefix': 'ctb', + 'ctbNum': 200, + 'startTs': 1640966400000, # 2022-01-01 00:00:00.000 + "rowsPerTbl": 1000, + "batchNum": 5000 + } + hostname = socket.gethostname() + dnodeNumbers=int(dnodeNumbers) + + tdLog.info("first check dnode and mnode") + tdSql=tdCom.newTdSql() + tdSql.query("select * from information_schema.ins_dnodes;") + tdSql.checkData(0,1,'%s:6030'%self.host) + + tdLog.printNoPrefix(f"==========step1:prepare cluster of {dnodeNumbers} dnodes whith old version-{BASEVERSION} ") + + scriptsPath = os.path.dirname(os.path.realpath(__file__)) + distro_id = distro.id() + if distro_id == "alpine": + tdLog.info(f"alpine skip Roll test") + return True + if platform.system().lower() == 'windows': + tdLog.info(f"Windows skip Roll test") + return True + + tdLog.info("====step1.1:stop all taosd and clear data dir,then start all old taosd ====") + + bPath = self.getBuildPath() + cPath = self.getCfgPath() + tdDnodes=cluster.dnodes + for i in range(dnodeNumbers): + tdDnodes[i].stoptaosd() + self.installTaosd(bPath,cPath) + for i in range(dnodeNumbers): + dnode_cfgPath = tdDnodes[i].cfgDir + dnode_dataPath = tdDnodes[i].dataDir + os.system(f"rm -rf {dnode_dataPath}/* && nohup taosd -c {dnode_cfgPath} & ") + + tdLog.info("====step1.2: create dnode on cluster ====") + + for i in range(1,dnodeNumbers): + dnode_id = tdDnodes[i].cfgDict["fqdn"] + ":" + tdDnodes[i].cfgDict["serverPort"] + os.system(f" LD_LIBRARY_PATH=/usr/lib taos -s 'create dnode \"{dnode_id}\" ' ") + sleep(5) + os.system(" LD_LIBRARY_PATH=/usr/lib taos -s 'show dnodes' ") + + for i in range(2,dnodeNumbers+1): + os.system(f" LD_LIBRARY_PATH=/usr/lib taos -s 'create mnode on dnode {i} ' ") + sleep(10) + os.system(" LD_LIBRARY_PATH=/usr/lib taos -s 'show mnodes' ") + + tdLog.info("====step1.3: insert data, includes time data, tmq and stream ====") + tableNumbers1=100 + recordNumbers1=100000 + recordNumbers2=1000 + + dbname = "dbtest" + stb = f"{dbname}.meters" + cPath_temp=cPath.replace("/","\/") + + # os.system("echo 'debugFlag 143' > /etc/taos/taos.cfg ") + # create database and tables + print(f"sed -i 's/\"cfgdir\".*/\"cfgdir\": \"{cPath_temp}\",/' 6-cluster/rollup_db.json && sed -i '0,/\"name\":.*/s/\"name\":.*/\"name\": \"{dbname}\",/' 6-cluster/rollup_db.json ") + os.system(f"sed -i 's/\"cfgdir\".*/\"cfgdir\": \"{cPath_temp}\",/' 6-cluster/rollup_db.json && sed -i '0,/\"name\":.*/s/\"name\":.*/\"name\": \"{dbname}\",/' 6-cluster/rollup_db.json") + print("LD_LIBRARY_PATH=/usr/lib taosBenchmark -f 6-cluster/rollup_db.json -y ") + os.system("LD_LIBRARY_PATH=/usr/lib taosBenchmark -f 6-cluster/rollup_db.json -y") + # insert data + tdLog.info(f" LD_LIBRARY_PATH=/usr/lib taosBenchmark -d test -t {tableNumbers1} -c {cPath} -n {recordNumbers2} -v 2 -a 3 -y -k 10 -z 5 ") + os.system(f"LD_LIBRARY_PATH=/usr/lib taosBenchmark -d test -t {tableNumbers1} -c {cPath} -n {recordNumbers2} -v 2 -a 3 -y -k 10 -z 5 ") + + # os.system(f"LD_LIBRARY_PATH=/usr/lib taos -s 'use test;create stream current_stream into current_stream_output_stb as select _wstart as `start`, _wend as wend, max(current) as max_current from meters where voltage <= 220 interval (5s);' ") + # os.system(f'LD_LIBRARY_PATH=/usr/lib taos -s "use test;create stream power_stream into power_stream_output_stb as select ts, concat_ws(\\".\\", location, tbname) as meter_location, current*voltage*cos(phase) as active_power, current*voltage*sin(phase) as reactive_power from meters partition by tbname;" ') + # os.system(f'LD_LIBRARY_PATH=/usr/lib taos -s "use test;show streams;" ') + os.system(f'LD_LIBRARY_PATH=/usr/lib taos -s "alter database test WAL_RETENTION_PERIOD 1000" ') + os.system(f'LD_LIBRARY_PATH=/usr/lib taos -s "create topic if not exists tmq_test_topic as select current,voltage,phase from test.meters where voltage <= 106 and current <= 5;" ') + os.system(f'LD_LIBRARY_PATH=/usr/lib taos -s "use test;show topics;" ') + + print(f"sed -i 's/\"cfgdir\".*/\"cfgdir\": \"{cPath_temp}\",/' 0-others/compa4096.json ") + os.system(f"sed -i 's/\"cfgdir\".*/\"cfgdir\": \"{cPath_temp}\",/'0-others/compa4096.json ") + tdLog.info(" LD_LIBRARY_PATH=/usr/lib taosBenchmark -f 0-others/compa4096.json -y -k 10 -z 5 ") + os.system("LD_LIBRARY_PATH=/usr/lib taosBenchmark -f 0-others/compa4096.json -y -k 10 -z 5 ") + os.system("LD_LIBRARY_PATH=/usr/lib taos -s 'flush database db4096 '") + os.system("LD_LIBRARY_PATH=/usr/lib taos -f 0-others/TS-3131.tsql") + # self.buildTaosd(bPath) + + threads=[] + threads.append(threading.Thread(target=self.insertAllData, args=(cPath_temp,dbname,tableNumbers1,recordNumbers1))) + for tr in threads: + tr.start() + # when inserting data porcess has been started up ,we can upgrade taosd + sleep(5) + tdLog.printNoPrefix("==========step2:start to rolling upgdade ") + for i in range(dnodeNumbers): + tdDnodes[i].running = 1 + tdDnodes[i].stoptaosd() + sleep(2) + tdDnodes[i].starttaosd() + + for tr in threads: + tr.join() + + tdLog.printNoPrefix(f"==========step3:check dnode status ") + # wait 10s for taosd cluster ready + sleep(10) + tdsql=tdCom.newTdSql() + tdsql.query("select * from information_schema.ins_dnodes;") + tdLog.info(tdsql.queryResult) + tdsql.checkData(2,1,'%s:6230'%self.host) + clusterComCheck.checkDnodes(dnodeNumbers) + + tdsql1=tdCom.newTdSql() + tdsql1.query(f"SELECT SERVER_VERSION();") + nowServerVersion=tdsql1.queryResult[0][0] + tdLog.printNoPrefix(f"==========step4:prepare and check data in new version-{nowServerVersion}") + + tdLog.info(f"New server version is {nowServerVersion}") + tdsql1.query(f"SELECT CLIENT_VERSION();") + nowClientVersion=tdsql1.queryResult[0][0] + tdLog.info(f"New client version is {nowClientVersion}") + + tdsql1.query(f"select count(*) from {stb}") + tdsql1.checkData(0,0,tableNumbers1*recordNumbers1) + tdsql1.query(f"select count(*) from db4096.stb0") + tdsql1.checkData(0,0,50000) + + # tdsql1.query("show streams;") + # tdsql1.checkRows(2) + tdsql1.query("select *,tbname from d0.almlog where mcid='m0103';") + tdsql1.checkRows(6) + expectList = [0,3003,20031,20032,20033,30031] + resultList = [] + for i in range(6): + resultList.append(tdsql1.queryResult[i][3]) + print(resultList) + if self.is_list_same_as_ordered_list(resultList,expectList): + print("The unordered list is the same as the ordered list.") + else: + tdlog.error("The unordered list is not the same as the ordered list.") + tdsql1.execute(f"insert into test.d80 values (now+1s, 11, 103, 0.21);") + tdsql1.execute(f"insert into test.d9 values (now+5s, 4.3, 104, 0.4);") + + conn = taos.connect() + + consumer = Consumer( + { + "group.id": "tg75", + "client.id": "124", + "td.connect.user": "root", + "td.connect.pass": "taosdata", + "enable.auto.commit": "true", + "experimental.snapshot.enable": "true", + } + ) + consumer.subscribe(["tmq_test_topic"]) + + while True: + res = consumer.poll(10) + if not res: + break + err = res.error() + if err is not None: + raise err + val = res.value() + + for block in val: + print(block.fetchall()) + tdsql1.query("show topics;") + tdsql1.checkRows(1) + + + # #check mnode status + # tdLog.info("check mnode status") + # clusterComCheck.checkMnodeStatus(mnodeNums) + + + def run(self): + # print(self.master_dnode.cfgDict) + self.fiveDnodeThreeMnode(dnodeNumbers=3,mnodeNums=3,restartNumbers=2,stopRole='dnode') + + def stop(self): + tdSql.close() + tdLog.success(f"{__file__} successfully executed") + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) diff --git a/tests/system-test/6-cluster/clusterCommonCheck.py b/tests/system-test/6-cluster/clusterCommonCheck.py index 7aa2ba06b9a38590bdb252b05891f9c6fa391640..439f0b6b8c93ee78e0fe1f2341a8426715c3ddab 100644 --- a/tests/system-test/6-cluster/clusterCommonCheck.py +++ b/tests/system-test/6-cluster/clusterCommonCheck.py @@ -256,12 +256,12 @@ class ClusterComCheck: if vgroup_status_first.count('leader') == 1 and vgroup_status_first.count('follower') == 2: if vgroup_status_last.count('leader') == 1 and vgroup_status_last.count('follower') == 2: ready_time= (count + 1) - tdLog.success(f"elections of {db_name} all vgroups are ready in {ready_time} s") + tdLog.success(f"elections of {db_name}.vgroups are ready in {ready_time} s") return True count+=1 else: tdLog.debug(tdSql.queryResult) - tdLog.notice(f"elections of {db_name} all vgroups are failed in{count}s ") + tdLog.notice(f"elections of {db_name} all vgroups are failed in{count} s ") caller = inspect.getframeinfo(inspect.stack()[1][0]) args = (caller.filename, caller.lineno) tdLog.exit("%s(%d) failed " % args) diff --git a/tests/system-test/6-cluster/manually-test/6dnode3mnodeInsertLessDataAlterRep3to1to3.py b/tests/system-test/6-cluster/manually-test/6dnode3mnodeInsertLessDataAlterRep3to1to3.py index fede19ca3ae2adee02acc00b88a71b860da0fddc..16ad3506c86c229859eabe2fa160e9dff3a71029 100644 --- a/tests/system-test/6-cluster/manually-test/6dnode3mnodeInsertLessDataAlterRep3to1to3.py +++ b/tests/system-test/6-cluster/manually-test/6dnode3mnodeInsertLessDataAlterRep3to1to3.py @@ -182,7 +182,7 @@ class TDTestCase: tdLog.info(f"show transactions;alter database db0_0 replica {replica3};") TdSqlEx.execute(f'show transactions;') TdSqlEx.execute(f'alter database db0_0 replica {replica3};') - clusterComCheck.check_vgroups_status(vgroup_numbers=paraDict["vgroups"],db_replica=replica3,db_name=paraDict["dbName"],count_number=120) + clusterComCheck.check_vgroups_status(vgroup_numbers=paraDict["vgroups"],db_replica=replica3,db_name=paraDict["dbName"],count_number=180) def run(self): # print(self.master_dnode.cfgDict) diff --git a/tests/system-test/6-cluster/rollup.json b/tests/system-test/6-cluster/rollup.json new file mode 100644 index 0000000000000000000000000000000000000000..02669acb937ea9e9575adda203c2a744bea66548 --- /dev/null +++ b/tests/system-test/6-cluster/rollup.json @@ -0,0 +1,77 @@ +{ + "filetype": "insert", + "cfgdir": "/home/chr/TDengine/debug/../sim/dnode1/cfg/", + "host": "localhost", + "port": 6030, + "rest_port": 6041, + "user": "root", + "password": "taosdata", + "thread_count": 4, + "create_table_thread_count": 4, + "result_file": "taosBenchmark_result.log", + "confirm_parameter_prompt": "no", + "insert_interval": 0, + "num_of_records_per_req": 1000, + "max_sql_len": 1024000, + "databases": [ + { + "dbinfo": { + "name": "dbtest", + "drop": "no", + "replica": 1, + "duration": 10, + "precision": "ms", + "keep": 3650, + "comp": 2, + "vgroups": 2, + "buffer": 1000 + }, + "super_tables": [ + { + "name": "meters", + "child_table_exists": "yes", + "childtable_count": 100, + "childtable_prefix": "ctb", + "escape_character": "no", + "auto_create_table": "no", + "batch_create_tbl_num": 500, + "data_source": "rand", + "insert_mode": "taosc", + "continue_if_fail": "yes", + "keep_trying": 500, + "trying_interval": 100, + "interlace_rows": 0, + "line_protocol": null, + "tcp_transfer": "no", + "insert_rows": 100000, + "childtable_limit": 0, + "childtable_offset": 0, + "rows_per_tbl": 0, + "max_sql_len": 1048576, + "disorder_ratio": 0, + "disorder_range": 1000, + "timestamp_step": 1000, + "start_timestamp": "2022-10-22 17:20:36", + "sample_format": "csv", + "sample_file": "./sample.csv", + "tags_file": "", + "partial_col_num": 999, + "columns": [{"type": "TIMESTAMP","max": 10, "min": 0},{"type": "INT","max": 10, "min": 0}, {"type": "BIGINT","max": 10, "min": 0}, {"type": "FLOAT","max": 10, "min": 0}, {"type": "DOUBLE","max": 10, "min": 0}, {"type": "SMALLINT","max": 10, "min": 0}, {"type": "TINYINT","max": 10, "min": 0}, {"type": "BOOL","max": 10, "min": 0}, {"type": "NCHAR","len": 29, "count":1, + "values": ["d1", "d2"] + }, {"type": "UINT","max": 10, "min": 0}, {"type": "UBIGINT","max": 10, "min": 0}, {"type": "UTINYINT","max": 10, "min": 0}, {"type": "USMALLINT","max": 10, "min": 0}, {"type": "BINARY", "len": 23, "count":1, + "values": ["b1","b2"] + }], + "tags": [{"type": "TIMESTAMP","max": 10, "min": 0},{"type": "INT","max": 10, "min": 0}, {"type": "BIGINT","max": 10, "min": 0}, {"type": "FLOAT","max": 10, "min": 0}, {"type": "DOUBLE","max": 10, "min": 0}, {"type": "SMALLINT","max": 10, "min": 0}, {"type": "TINYINT","max": 10, "min": 0}, {"type": "BOOL","max": 10, "min": 0}, {"type": "NCHAR","len": 17, "count":1, + "values": ["d1", "d2"] + }, {"type": "UINT","max": 10, "min": 0}, {"type": "UBIGINT","max": 10, "min": 0}, {"type": "UTINYINT","max": 10, "min": 0}, {"type": "USMALLINT","max": 10, "min": 0}, {"type": "BINARY", "len": 19, "count":1, + "values": ["b1","b2"] + }] + } + ] + } + ], + "prepare_rand": 10000, + "chinese": "no", + "streams": false, + "test_log": "/root/testlog/" +} diff --git a/tests/system-test/6-cluster/rollup_db.json b/tests/system-test/6-cluster/rollup_db.json new file mode 100644 index 0000000000000000000000000000000000000000..fedc47024cc3a2dbaf9c9146c1fcef0c7e454d9d --- /dev/null +++ b/tests/system-test/6-cluster/rollup_db.json @@ -0,0 +1,77 @@ +{ + "filetype": "insert", + "cfgdir": "/home/chr/TDengine/debug/../sim/dnode1/cfg/", + "host": "localhost", + "port": 6030, + "rest_port": 6041, + "user": "root", + "password": "taosdata", + "thread_count": 4, + "create_table_thread_count": 4, + "result_file": "taosBenchmark_result.log", + "confirm_parameter_prompt": "no", + "insert_interval": 0, + "num_of_records_per_req": 1000, + "max_sql_len": 1024000, + "databases": [ + { + "dbinfo": { + "name": "dbtest", + "drop": "yes", + "replica": 1, + "duration": 10, + "precision": "ms", + "keep": 3650, + "comp": 2, + "vgroups": 2, + "buffer": 1000 + }, + "super_tables": [ + { + "name": "meters", + "child_table_exists": "no", + "childtable_count": 100, + "childtable_prefix": "ctb", + "escape_character": "no", + "auto_create_table": "no", + "batch_create_tbl_num": 500, + "data_source": "rand", + "insert_mode": "taosc", + "continue_if_fail": "yes", + "keep_trying": 500, + "trying_interval": 100, + "interlace_rows": 0, + "line_protocol": null, + "tcp_transfer": "no", + "insert_rows": 0, + "childtable_limit": 0, + "childtable_offset": 0, + "rows_per_tbl": 0, + "max_sql_len": 1048576, + "disorder_ratio": 0, + "disorder_range": 1000, + "timestamp_step": 1000, + "start_timestamp": "2022-10-22 17:20:36", + "sample_format": "csv", + "sample_file": "./sample.csv", + "tags_file": "", + "partial_col_num": 999, + "columns": [{"type": "TIMESTAMP","max": 10, "min": 0},{"type": "INT","max": 10, "min": 0}, {"type": "BIGINT","max": 10, "min": 0}, {"type": "FLOAT","max": 10, "min": 0}, {"type": "DOUBLE","max": 10, "min": 0}, {"type": "SMALLINT","max": 10, "min": 0}, {"type": "TINYINT","max": 10, "min": 0}, {"type": "BOOL","max": 10, "min": 0}, {"type": "NCHAR","len": 29, "count":1, + "values": ["d1", "d2"] + }, {"type": "UINT","max": 10, "min": 0}, {"type": "UBIGINT","max": 10, "min": 0}, {"type": "UTINYINT","max": 10, "min": 0}, {"type": "USMALLINT","max": 10, "min": 0}, {"type": "BINARY", "len": 23, "count":1, + "values": ["b1","b2"] + }], + "tags": [{"type": "TIMESTAMP","max": 10, "min": 0},{"type": "INT","max": 10, "min": 0}, {"type": "BIGINT","max": 10, "min": 0}, {"type": "FLOAT","max": 10, "min": 0}, {"type": "DOUBLE","max": 10, "min": 0}, {"type": "SMALLINT","max": 10, "min": 0}, {"type": "TINYINT","max": 10, "min": 0}, {"type": "BOOL","max": 10, "min": 0}, {"type": "NCHAR","len": 17, "count":1, + "values": ["d1", "d2"] + }, {"type": "UINT","max": 10, "min": 0}, {"type": "UBIGINT","max": 10, "min": 0}, {"type": "UTINYINT","max": 10, "min": 0}, {"type": "USMALLINT","max": 10, "min": 0}, {"type": "BINARY", "len": 19, "count":1, + "values": ["b1","b2"] + }] + } + ] + } + ], + "prepare_rand": 10000, + "chinese": "no", + "streams": false, + "test_log": "/root/testlog/" +} diff --git a/tests/system-test/7-tmq/basic5.py b/tests/system-test/7-tmq/basic5.py index 080b431ffe54f43e725e22d84a3f365f8bfc4cea..a16f2c348fdd925e8eadd1558ef12de926e64f92 100644 --- a/tests/system-test/7-tmq/basic5.py +++ b/tests/system-test/7-tmq/basic5.py @@ -149,7 +149,6 @@ class TDTestCase: topicFromStb = 'topic_stb_column' topicFromCtb = 'topic_ctb_column' - tdSql.execute("alter database %s wal_retention_period 3600" % (parameterDict['dbName'])) tdSql.execute("create topic %s as select ts, c1, c2 from %s.%s" %(topicFromStb, parameterDict['dbName'], parameterDict['stbName'])) tdSql.execute("create topic %s as select ts, c1, c2 from %s.%s_0" %(topicFromCtb, parameterDict['dbName'], parameterDict['stbName'])) diff --git a/tests/system-test/7-tmq/create_wrong_topic.py b/tests/system-test/7-tmq/create_wrong_topic.py index 77d43149cd611225d2ee8f7b017013b0fc784bfb..66814d22ea05578408793132a9ba3cd3df16a4db 100644 --- a/tests/system-test/7-tmq/create_wrong_topic.py +++ b/tests/system-test/7-tmq/create_wrong_topic.py @@ -44,7 +44,6 @@ class TDTestCase: def wrong_topic(self): tdSql.prepare() tdSql.execute('use db') - tdSql.execute('alter database db wal_retention_period 3600') stbname = f'db.{tdCom.getLongName(5, "letters")}' tag_dict = { 't0':'int' diff --git a/tests/system-test/7-tmq/dataFromTsdbNWal-multiCtb.py b/tests/system-test/7-tmq/dataFromTsdbNWal-multiCtb.py index 44f58279be2df9cc97ee504babebbbfda1793f44..808a4935e30ccec4d06863e54e5e2ff3e1ba635d 100644 --- a/tests/system-test/7-tmq/dataFromTsdbNWal-multiCtb.py +++ b/tests/system-test/7-tmq/dataFromTsdbNWal-multiCtb.py @@ -67,7 +67,6 @@ class TDTestCase: tdLog.info("flush db to let data falls into the disk") tdSql.query("flush database %s"%(paraDict['dbName'])) - tdSql.execute("alter database %s wal_retention_period 3600"%(paraDict['dbName'])) return def tmqCase1(self): diff --git a/tests/system-test/7-tmq/dataFromTsdbNWal.py b/tests/system-test/7-tmq/dataFromTsdbNWal.py index 0f4f1694c17701d9d4361c788980215ae418db5e..8386c22cd0430128a1b873f3f5150ef51704b245 100644 --- a/tests/system-test/7-tmq/dataFromTsdbNWal.py +++ b/tests/system-test/7-tmq/dataFromTsdbNWal.py @@ -67,7 +67,6 @@ class TDTestCase: tdLog.info("flush db to let data falls into the disk") tdSql.query("flush database %s"%(paraDict['dbName'])) - tdSql.execute("alter database %s wal_retention_period 3600"%(paraDict['dbName'])) return def tmqCase1(self): diff --git a/tests/system-test/7-tmq/dropDbR3ConflictTransaction.py b/tests/system-test/7-tmq/dropDbR3ConflictTransaction.py index 7d11684ed81fd79584253e032f67cae1bd833de8..e25fb412af44231e0a0a5deb672ed44a15b1e879 100644 --- a/tests/system-test/7-tmq/dropDbR3ConflictTransaction.py +++ b/tests/system-test/7-tmq/dropDbR3ConflictTransaction.py @@ -106,7 +106,6 @@ class TDTestCase: paraDict['ctbNum'] = self.ctbNum paraDict['rowsPerTbl'] = self.rowsPerTbl - tdSql.execute("alter database dbt wal_retention_period 3600") tdLog.info("create topics from stb1") topicFromStb1 = 'topic_stb1' queryString = "select ts, c1, c2 from %s.%s where t4 == 'beijing' or t4 == 'changsha' "%(paraDict['dbName'], paraDict['stbName']) diff --git a/tests/system-test/7-tmq/stbFilter.py b/tests/system-test/7-tmq/stbFilter.py index 3f862ae0472f6a1312ac5f2ea680adbd4b95f172..6b48a6d57076b4baf54d7c08db7cd8a625fab29c 100644 --- a/tests/system-test/7-tmq/stbFilter.py +++ b/tests/system-test/7-tmq/stbFilter.py @@ -45,7 +45,6 @@ class TDTestCase: tmqCom.initConsumerTable() tmqCom.create_database(tsql=tdSql, dbName=paraDict["dbName"],dropFlag=paraDict["dropFlag"], vgroups=paraDict['vgroups'],replica=paraDict['replica']) - tdSql.execute("alter database %s wal_retention_period 3600"%(paraDict["dbName"])) tdLog.info("create stb") tmqCom.create_stable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"]) tdLog.info("create ctb") diff --git a/tests/system-test/7-tmq/stbFilterWhere.py b/tests/system-test/7-tmq/stbFilterWhere.py index 8d8d046cef14dae6e4e01828b050201fcb575185..9b3e8b77109c7064f8920cbc2b964e17e6ed39dd 100644 --- a/tests/system-test/7-tmq/stbFilterWhere.py +++ b/tests/system-test/7-tmq/stbFilterWhere.py @@ -45,7 +45,6 @@ class TDTestCase: tmqCom.initConsumerTable() tmqCom.create_database(tsql=tdSql, dbName=paraDict["dbName"],dropFlag=paraDict["dropFlag"], vgroups=paraDict['vgroups'],replica=paraDict['replica']) - tdSql.execute("alter database %s wal_retention_period 3600"%(paraDict["dbName"])) tdLog.info("create stb") tmqCom.create_stable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"]) tdLog.info("create ctb") diff --git a/tests/system-test/7-tmq/stbTagFilter-1ctb.py b/tests/system-test/7-tmq/stbTagFilter-1ctb.py index 1867dc54cb3744077f477f976f88edf59f744dc3..1e6011d5db14f98b8bf29b60812a0c3eb880cb62 100644 --- a/tests/system-test/7-tmq/stbTagFilter-1ctb.py +++ b/tests/system-test/7-tmq/stbTagFilter-1ctb.py @@ -106,7 +106,6 @@ class TDTestCase: # ctbNum=paraDict["ctbNum"],rowsPerTbl=paraDict["rowsPerTbl"],batchNum=paraDict["batchNum"], # startTs=paraDict["startTs"],ctbStartIdx=paraDict['ctbStartIdx']) - tdSql.execute("alter database dbt wal_retention_period 3600") tdLog.info("create topics from stb1") topicFromStb1 = 'topic_UpperCase_stb1' # queryString = "select ts, c1, c2 from %s.%s where t4 == 'shanghai' or t4 == 'changsha'"%(paraDict['dbName'], paraDict['stbName']) diff --git a/tests/system-test/7-tmq/stbTagFilter-multiCtb.py b/tests/system-test/7-tmq/stbTagFilter-multiCtb.py index 67cc60d196bfd31f75e71978145f58ac40b698c7..5043c46f00dfda378d750357659504f2ecc542b6 100644 --- a/tests/system-test/7-tmq/stbTagFilter-multiCtb.py +++ b/tests/system-test/7-tmq/stbTagFilter-multiCtb.py @@ -54,7 +54,6 @@ class TDTestCase: tmqCom.initConsumerTable() tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=paraDict["vgroups"],replica=1) - tdSql.execute("alter database %s wal_retention_period 3600" %(paraDict['dbName'])) tdLog.info("create stb") tmqCom.create_stable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"]) tdLog.info("create ctb") diff --git a/tests/system-test/7-tmq/subscribeDb4.py b/tests/system-test/7-tmq/subscribeDb4.py index 764362c70882e0851d3cd5eb80ac4f0096a71113..c14d3b27b11fa8e36bd216f23a2371018358aaf0 100644 --- a/tests/system-test/7-tmq/subscribeDb4.py +++ b/tests/system-test/7-tmq/subscribeDb4.py @@ -65,7 +65,6 @@ class TDTestCase: tmqCom.initConsumerTable(self.cdbName) tdCom.create_database(tdSql,self.paraDict["dbName"],self.paraDict["dropFlag"]) - tdSql.execute("alter database %s wal_retention_period 3600" % (self.paraDict['dbName'])) self.paraDict["stbName"] = 'stb1' tdCom.create_stable(tdSql,dbname=self.paraDict["dbName"],stbname=self.paraDict["stbName"],column_elm_list=self.paraDict["colSchema"],tag_elm_list=self.paraDict["tagSchema"],count=1, default_stbname_prefix=self.paraDict["stbName"]) diff --git a/tests/system-test/7-tmq/tmq3mnodeSwitch.py b/tests/system-test/7-tmq/tmq3mnodeSwitch.py index 0740830696987a6009db020de14bdc768d56c69a..8c5dc5e693a43fd6d7a4ea724db5bdfa0fd7b8a4 100644 --- a/tests/system-test/7-tmq/tmq3mnodeSwitch.py +++ b/tests/system-test/7-tmq/tmq3mnodeSwitch.py @@ -172,7 +172,6 @@ class TDTestCase: tdLog.info("async insert data") pThread = tmqCom.asyncInsertData(paraDict) - tdSql.execute("alter database %s wal_retention_period 3600" %(paraDict['dbName'])) tdLog.info("create topics from stb with filter") # queryString = "select ts, log(c1), ceil(pow(c1,3)) from %s.%s where c1 %% 7 == 0" %(paraDict['dbName'], paraDict['stbName']) diff --git a/tests/system-test/7-tmq/tmqAlterSchema.py b/tests/system-test/7-tmq/tmqAlterSchema.py index 1a8b0693b86cae8d12eaeb11f3af91f587a23a6f..a70678219f15ba94b7f9ceb1f81b9aa3881660d4 100644 --- a/tests/system-test/7-tmq/tmqAlterSchema.py +++ b/tests/system-test/7-tmq/tmqAlterSchema.py @@ -65,7 +65,6 @@ class TDTestCase: queryStringList = [] tmqCom.initConsumerTable() tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=4,replica=1) - tdSql.execute("alter database %s wal_retention_period 3600" %(paraDict['dbName'])) tdLog.info("create stb") tdCom.create_stable(tdSql, dbname=paraDict["dbName"],stbname=paraDict["stbName"], column_elm_list=paraDict['colSchema'], tag_elm_list=paraDict['tagSchema']) tdLog.info("create ctb") @@ -176,7 +175,6 @@ class TDTestCase: queryStringList = [] tmqCom.initConsumerTable() tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=4,replica=1) - tdSql.execute("alter database %s wal_retention_period 3600" %(paraDict['dbName'])) tdLog.info("create stb") tdCom.create_stable(tdSql, dbname=paraDict["dbName"],stbname=paraDict["stbName"], column_elm_list=paraDict['colSchema'], tag_elm_list=paraDict['tagSchema']) tdLog.info("create ntb") diff --git a/tests/system-test/7-tmq/tmqCheckData.py b/tests/system-test/7-tmq/tmqCheckData.py index cb5a40642aab7ba2053780b898fa498e2c8b49a3..4d5edf87f1bb6b88ed9fe3dd8402f94e6c915744 100644 --- a/tests/system-test/7-tmq/tmqCheckData.py +++ b/tests/system-test/7-tmq/tmqCheckData.py @@ -53,7 +53,6 @@ class TDTestCase: tdLog.info("insert data") tmqCom.insert_data(tdSql,paraDict["dbName"],paraDict["ctbPrefix"],paraDict["ctbNum"],paraDict["rowsPerTbl"],paraDict["batchNum"],paraDict["startTs"]) - tdSql.execute("alter database %s wal_retention_period 3600" % (paraDict['dbName'])) tdLog.info("create topics from stb with filter") queryString = "select ts, log(c1), ceil(pow(c1,3)) from %s.%s where c1 %% 7 == 0" %(paraDict['dbName'], paraDict['stbName']) sqlString = "create topic %s as %s" %(topicNameList[0], queryString) diff --git a/tests/system-test/7-tmq/tmqCheckData1.py b/tests/system-test/7-tmq/tmqCheckData1.py index b4fec94dcc3a2a266a35400dc789ee13aa463caf..1209c2812c536f9b3b6d23c8ad64ac1bbad033bc 100644 --- a/tests/system-test/7-tmq/tmqCheckData1.py +++ b/tests/system-test/7-tmq/tmqCheckData1.py @@ -45,7 +45,6 @@ class TDTestCase: expectRowsList = [] tmqCom.initConsumerTable() tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=4,replica=1) - tdSql.execute("alter database %s wal_retention_period 3600" % (paraDict['dbName'])) tdLog.info("create stb") tdCom.create_stable(tdSql, dbname=paraDict["dbName"],stbname=paraDict["stbName"], column_elm_list=paraDict['colSchema'], tag_elm_list=paraDict['tagSchema']) tdLog.info("create ctb") diff --git a/tests/system-test/7-tmq/tmqClientConsLog.py b/tests/system-test/7-tmq/tmqClientConsLog.py index a56bdecb5895481ca5e6460a376e1157747f3798..7f755726cee55a18400e58f9c5b0b58837cdb3bb 100644 --- a/tests/system-test/7-tmq/tmqClientConsLog.py +++ b/tests/system-test/7-tmq/tmqClientConsLog.py @@ -54,7 +54,6 @@ class TDTestCase: tmqCom.initConsumerTable() tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=paraDict["vgroups"],replica=self.replicaVar) - tdSql.execute("alter database %s wal_retention_period 3600" % (paraDict['dbName'])) tdLog.info("create stb") tmqCom.create_stable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"]) tdLog.info("create ctb") diff --git a/tests/system-test/7-tmq/tmqConsFromTsdb-1ctb-funcNFilter.py b/tests/system-test/7-tmq/tmqConsFromTsdb-1ctb-funcNFilter.py index 3ad1d097e11b85f04f1d0cc9b2cd75a9ea1bdb0a..f372a2b7428a77b02b8dcdb5c4c546756ef7e1f7 100644 --- a/tests/system-test/7-tmq/tmqConsFromTsdb-1ctb-funcNFilter.py +++ b/tests/system-test/7-tmq/tmqConsFromTsdb-1ctb-funcNFilter.py @@ -54,7 +54,6 @@ class TDTestCase: tmqCom.initConsumerTable() tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=paraDict["vgroups"],replica=1) - tdSql.execute("alter database %s wal_retention_period 3600" % (paraDict['dbName'])) tdLog.info("create stb") tmqCom.create_stable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"]) tdLog.info("create ctb") diff --git a/tests/system-test/7-tmq/tmqConsFromTsdb-1ctb.py b/tests/system-test/7-tmq/tmqConsFromTsdb-1ctb.py index fdd683d08daba5625017029a3363b379d2a206b1..c7f95f6f41cbf72e8d64c3a9bd8218dfd7a1c6d5 100644 --- a/tests/system-test/7-tmq/tmqConsFromTsdb-1ctb.py +++ b/tests/system-test/7-tmq/tmqConsFromTsdb-1ctb.py @@ -54,7 +54,6 @@ class TDTestCase: tmqCom.initConsumerTable() tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=paraDict["vgroups"],replica=1) - tdSql.execute("alter database %s wal_retention_period 3600" % (paraDict['dbName'])) tdLog.info("create stb") tmqCom.create_stable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"]) tdLog.info("create ctb") diff --git a/tests/system-test/7-tmq/tmqConsFromTsdb-mutilVg-mutilCtb-funcNFilter.py b/tests/system-test/7-tmq/tmqConsFromTsdb-mutilVg-mutilCtb-funcNFilter.py index f05f0abeff3f3f6dabde18ac3d3543d9df032f4e..26dacf514d4f66273d36f6ac3fe49ee17602747a 100644 --- a/tests/system-test/7-tmq/tmqConsFromTsdb-mutilVg-mutilCtb-funcNFilter.py +++ b/tests/system-test/7-tmq/tmqConsFromTsdb-mutilVg-mutilCtb-funcNFilter.py @@ -54,7 +54,6 @@ class TDTestCase: tmqCom.initConsumerTable() tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=paraDict["vgroups"],replica=1) - tdSql.execute("alter database %s wal_retention_period 3600" % (paraDict['dbName'])) tdLog.info("create stb") tmqCom.create_stable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"]) tdLog.info("create ctb") diff --git a/tests/system-test/7-tmq/tmqConsFromTsdb-mutilVg-mutilCtb.py b/tests/system-test/7-tmq/tmqConsFromTsdb-mutilVg-mutilCtb.py index 75b49a34fc583217e5ee0bb81324abc8421ed31a..d6f100041b15c205e8294cca964353d880b9ecc4 100644 --- a/tests/system-test/7-tmq/tmqConsFromTsdb-mutilVg-mutilCtb.py +++ b/tests/system-test/7-tmq/tmqConsFromTsdb-mutilVg-mutilCtb.py @@ -54,7 +54,6 @@ class TDTestCase: tmqCom.initConsumerTable() tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=paraDict["vgroups"],replica=1) - tdSql.execute("alter database %s wal_retention_period 3600" % (paraDict['dbName'])) tdLog.info("create stb") tmqCom.create_stable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"]) tdLog.info("create ctb") diff --git a/tests/system-test/7-tmq/tmqConsFromTsdb-mutilVg.py b/tests/system-test/7-tmq/tmqConsFromTsdb-mutilVg.py index 26f7a9fb4d1300907807651a5e2fd9dd80579725..11fc7dbcc0587b20fd65bc71047ba04c1adb3f91 100644 --- a/tests/system-test/7-tmq/tmqConsFromTsdb-mutilVg.py +++ b/tests/system-test/7-tmq/tmqConsFromTsdb-mutilVg.py @@ -56,7 +56,6 @@ class TDTestCase: tmqCom.initConsumerTable() tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=paraDict["vgroups"],replica=1) - tdSql.execute("alter database %s wal_retention_period 3600" % (paraDict['dbName'])) tdLog.info("create stb") tmqCom.create_stable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"]) tdLog.info("create ctb") diff --git a/tests/system-test/7-tmq/tmqConsFromTsdb.py b/tests/system-test/7-tmq/tmqConsFromTsdb.py index cca29c178db281b86d145b8623f72a2dd8df1366..8ed4a6df973b57f7302d5a2c193debffbf7286a1 100644 --- a/tests/system-test/7-tmq/tmqConsFromTsdb.py +++ b/tests/system-test/7-tmq/tmqConsFromTsdb.py @@ -54,7 +54,6 @@ class TDTestCase: tmqCom.initConsumerTable() tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=paraDict["vgroups"],replica=self.replicaVar) - tdSql.execute("alter database %s wal_retention_period 3600" % (paraDict['dbName'])) tdLog.info("create stb") tmqCom.create_stable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"]) tdLog.info("create ctb") diff --git a/tests/system-test/7-tmq/tmqConsFromTsdb1-1ctb-funcNFilter.py b/tests/system-test/7-tmq/tmqConsFromTsdb1-1ctb-funcNFilter.py index 00d2491c97b0a178fbfd289cf44e2617107fdfe9..6a03f0f75192dda0b8b8394b1733bdec02222bda 100644 --- a/tests/system-test/7-tmq/tmqConsFromTsdb1-1ctb-funcNFilter.py +++ b/tests/system-test/7-tmq/tmqConsFromTsdb1-1ctb-funcNFilter.py @@ -54,7 +54,6 @@ class TDTestCase: tmqCom.initConsumerTable() tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=paraDict["vgroups"],replica=1) - tdSql.execute("alter database %s wal_retention_period 3600" % (paraDict['dbName'])) tdLog.info("create stb") tmqCom.create_stable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"]) tdLog.info("create ctb") diff --git a/tests/system-test/7-tmq/tmqConsFromTsdb1-1ctb.py b/tests/system-test/7-tmq/tmqConsFromTsdb1-1ctb.py index 3b1dbae443f0e8d6f71dc3c8127975790c23401a..c11159c6e551d60b90aab16bb7c90e4b26021ac2 100644 --- a/tests/system-test/7-tmq/tmqConsFromTsdb1-1ctb.py +++ b/tests/system-test/7-tmq/tmqConsFromTsdb1-1ctb.py @@ -54,7 +54,6 @@ class TDTestCase: tmqCom.initConsumerTable() tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=paraDict["vgroups"],replica=1) - tdSql.execute("alter database %s wal_retention_period 3600" % (paraDict['dbName'])) tdLog.info("create stb") tmqCom.create_stable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"]) tdLog.info("create ctb") diff --git a/tests/system-test/7-tmq/tmqConsFromTsdb1-mutilVg-mutilCtb-funcNFilter.py b/tests/system-test/7-tmq/tmqConsFromTsdb1-mutilVg-mutilCtb-funcNFilter.py index a799fa57195fe84065c7bb0b295dc7c0f14cdd09..a717c4966de9d5813545c68099a7a0cd58effa20 100644 --- a/tests/system-test/7-tmq/tmqConsFromTsdb1-mutilVg-mutilCtb-funcNFilter.py +++ b/tests/system-test/7-tmq/tmqConsFromTsdb1-mutilVg-mutilCtb-funcNFilter.py @@ -54,7 +54,6 @@ class TDTestCase: tmqCom.initConsumerTable() tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=paraDict["vgroups"],replica=1) - tdSql.execute("alter database %s wal_retention_period 3600" % (paraDict['dbName'])) tdLog.info("create stb") tmqCom.create_stable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"]) tdLog.info("create ctb") @@ -223,9 +222,9 @@ class TDTestCase: actConsumeTotalRows = resultList[0] - if not (actConsumeTotalRows > 0 and actConsumeTotalRows < totalRowsInserted): + if not (actConsumeTotalRows >= 0 and actConsumeTotalRows <= totalRowsInserted): tdLog.info("act consume rows: %d"%(actConsumeTotalRows)) - tdLog.info("and second consume rows should be between 0 and %d"%(totalRowsInserted)) + tdLog.info("and second consume rows should be between [0 and %d]"%(totalRowsInserted)) tdLog.exit("%d tmq consume rows error!"%consumerId) time.sleep(10) diff --git a/tests/system-test/7-tmq/tmqConsFromTsdb1-mutilVg-mutilCtb.py b/tests/system-test/7-tmq/tmqConsFromTsdb1-mutilVg-mutilCtb.py index f0bedbb187b9987e8f367f1afed7b69fb4c41001..439845aa54a6d29c1cd7633e9949da3eaa40cfee 100644 --- a/tests/system-test/7-tmq/tmqConsFromTsdb1-mutilVg-mutilCtb.py +++ b/tests/system-test/7-tmq/tmqConsFromTsdb1-mutilVg-mutilCtb.py @@ -54,7 +54,6 @@ class TDTestCase: tmqCom.initConsumerTable() tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=paraDict["vgroups"],replica=1) - tdSql.execute("alter database %s wal_retention_period 3600" % (paraDict['dbName'])) tdLog.info("create stb") tmqCom.create_stable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"]) tdLog.info("create ctb") diff --git a/tests/system-test/7-tmq/tmqConsFromTsdb1-mutilVg.py b/tests/system-test/7-tmq/tmqConsFromTsdb1-mutilVg.py index a63927dd8bdee0483cc943ececaac1855248ef57..53ff020b08abf2b758a6de00984b0cb799528d45 100644 --- a/tests/system-test/7-tmq/tmqConsFromTsdb1-mutilVg.py +++ b/tests/system-test/7-tmq/tmqConsFromTsdb1-mutilVg.py @@ -54,7 +54,6 @@ class TDTestCase: tmqCom.initConsumerTable() tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=paraDict["vgroups"],replica=1) - tdSql.execute("alter database %s wal_retention_period 3600" % (paraDict['dbName'])) tdLog.info("create stb") tmqCom.create_stable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"]) tdLog.info("create ctb") diff --git a/tests/system-test/7-tmq/tmqConsFromTsdb1.py b/tests/system-test/7-tmq/tmqConsFromTsdb1.py index 8fcc991d4e3904da04480c7bbc28cac49cf5c068..4bb6cf463f59519565f43e04e18d7d7902b73108 100644 --- a/tests/system-test/7-tmq/tmqConsFromTsdb1.py +++ b/tests/system-test/7-tmq/tmqConsFromTsdb1.py @@ -54,7 +54,6 @@ class TDTestCase: tmqCom.initConsumerTable() tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=paraDict["vgroups"],replica=self.replicaVar) - tdSql.execute("alter database %s wal_retention_period 3600" % (paraDict['dbName'])) tdLog.info("create stb") tmqCom.create_stable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"]) tdLog.info("create ctb") diff --git a/tests/system-test/7-tmq/tmqConsumerGroup.py b/tests/system-test/7-tmq/tmqConsumerGroup.py index f05f600f27b1d1c3194c85a03edfbcac1aeee3b7..e64d8552340ba9c348ee637e1f676556fd6beabb 100644 --- a/tests/system-test/7-tmq/tmqConsumerGroup.py +++ b/tests/system-test/7-tmq/tmqConsumerGroup.py @@ -45,7 +45,6 @@ class TDTestCase: queryRowsList = [] tmqCom.initConsumerTable() tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=4,replica=1) - tdSql.execute("alter database %s wal_retention_period 3600" % (paraDict['dbName'])) tdLog.info("create stb") tdCom.create_stable(tdSql, dbname=paraDict["dbName"],stbname=paraDict["stbName"], column_elm_list=paraDict['colSchema'], tag_elm_list=paraDict['tagSchema']) tdLog.info("create ctb") diff --git a/tests/system-test/7-tmq/tmqDnodeRestart.py b/tests/system-test/7-tmq/tmqDnodeRestart.py index 648d629e5cc2e5cbcc8d60cf1599ba93c37d080a..74aba317264cea673cba4ed459165c20caec1cf3 100644 --- a/tests/system-test/7-tmq/tmqDnodeRestart.py +++ b/tests/system-test/7-tmq/tmqDnodeRestart.py @@ -187,7 +187,6 @@ class TDTestCase: tmqCom.initConsumerTable() # tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=paraDict["vgroups"],replica=1) - # tdSql.execute("alter database %s wal_retention_period 3600" % (paraDict['dbName'])) # tdLog.info("create stb") # tmqCom.create_stable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"]) # tdLog.info("create ctb") diff --git a/tests/system-test/7-tmq/tmqDropNtb-snapshot0.py b/tests/system-test/7-tmq/tmqDropNtb-snapshot0.py index 7a9c1bbb8c808fe62e8fede76640ac78e19f2f30..c8bcdd6235782f4a29cc18a4f243e8e53ed5ffe5 100644 --- a/tests/system-test/7-tmq/tmqDropNtb-snapshot0.py +++ b/tests/system-test/7-tmq/tmqDropNtb-snapshot0.py @@ -57,7 +57,6 @@ class TDTestCase: tmqCom.initConsumerTable() tdLog.info("start create database....") tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=paraDict["vgroups"],replica=1) - tdSql.execute("alter database %s wal_retention_period 3600" % (paraDict['dbName'])) tdLog.info("start create normal tables....") tmqCom.create_ntable(tsql=tdSql, dbname=paraDict["dbName"], tbname_prefix=paraDict["ctbPrefix"], tbname_index_start_num = 1, column_elm_list=paraDict["colSchema"], colPrefix='c', tblNum=paraDict["ctbNum"]) tdLog.info("start insert data into normal tables....") @@ -144,7 +143,6 @@ class TDTestCase: tmqCom.initConsumerTable() tdLog.info("start create database....") tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=paraDict["vgroups"],replica=1) - tdSql.execute("alter database %s wal_retention_period 3600" % (paraDict['dbName'])) tdLog.info("start create normal tables....") tmqCom.create_ntable(tsql=tdSql, dbname=paraDict["dbName"], tbname_prefix=paraDict["ctbPrefix"], tbname_index_start_num = 1, column_elm_list=paraDict["colSchema"], colPrefix='c', tblNum=paraDict["ctbNum"]) tdLog.info("start insert data into normal tables....") diff --git a/tests/system-test/7-tmq/tmqDropNtb-snapshot1.py b/tests/system-test/7-tmq/tmqDropNtb-snapshot1.py index 0b9cb7e66a2d481f6162f1e5e92d8c1f68dcb300..3fc5a2fdc70b64ec95324c370625f5397287ae7d 100644 --- a/tests/system-test/7-tmq/tmqDropNtb-snapshot1.py +++ b/tests/system-test/7-tmq/tmqDropNtb-snapshot1.py @@ -57,7 +57,6 @@ class TDTestCase: tmqCom.initConsumerTable() tdLog.info("start create database....") tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=paraDict["vgroups"],replica=1) - tdSql.execute("alter database %s wal_retention_period 3600" % (paraDict['dbName'])) tdLog.info("start create normal tables....") tmqCom.create_ntable(tsql=tdSql, dbname=paraDict["dbName"], tbname_prefix=paraDict["ctbPrefix"], tbname_index_start_num = 1, column_elm_list=paraDict["colSchema"], colPrefix='c', tblNum=paraDict["ctbNum"]) tdLog.info("start insert data into normal tables....") @@ -144,7 +143,6 @@ class TDTestCase: tmqCom.initConsumerTable() tdLog.info("start create database....") tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=paraDict["vgroups"],replica=1) - tdSql.execute("alter database %s wal_retention_period 3600" % (paraDict['dbName'])) tdLog.info("start create normal tables....") tmqCom.create_ntable(tsql=tdSql, dbname=paraDict["dbName"], tbname_prefix=paraDict["ctbPrefix"], tbname_index_start_num = 1, column_elm_list=paraDict["colSchema"], colPrefix='c', tblNum=paraDict["ctbNum"]) tdLog.info("start insert data into normal tables....") diff --git a/tests/system-test/7-tmq/tmqDropStb.py b/tests/system-test/7-tmq/tmqDropStb.py index 0b252a733495041464a3504a3bc8d94e54fcf48c..00affabafcb8c729e2f3ca9f988a0f7930eb030b 100644 --- a/tests/system-test/7-tmq/tmqDropStb.py +++ b/tests/system-test/7-tmq/tmqDropStb.py @@ -64,7 +64,6 @@ class TDTestCase: tmqCom.initConsumerTable(self.cdbName) tdCom.create_database(tdSql,self.paraDict["dbName"],self.paraDict["dropFlag"]) - tdSql.execute("alter database %s wal_retention_period 3600" % (self.paraDict['dbName'])) self.paraDict["stbName"] = 'stb1' tdCom.create_stable(tdSql,dbname=self.paraDict["dbName"],stbname=self.paraDict["stbName"],column_elm_list=self.paraDict["colSchema"],tag_elm_list=self.paraDict["tagSchema"],count=1, default_stbname_prefix=self.paraDict["stbName"]) diff --git a/tests/system-test/7-tmq/tmqDropStbCtb.py b/tests/system-test/7-tmq/tmqDropStbCtb.py index 587baf12aab02a3ae3e7d4d9783cafab6ac0ceca..c9e34136cc1e59af173205a8e0cb2796acb1a14a 100644 --- a/tests/system-test/7-tmq/tmqDropStbCtb.py +++ b/tests/system-test/7-tmq/tmqDropStbCtb.py @@ -54,7 +54,6 @@ class TDTestCase: # tmqCom.initConsumerTable() tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=paraDict["vgroups"],replica=1) - tdSql.execute("alter database %s wal_retention_period 3600" % (paraDict['dbName'])) tdLog.info("create stb") tmqCom.create_stable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"]) tdLog.info("create ctb") diff --git a/tests/system-test/7-tmq/tmqMaxTopic.py b/tests/system-test/7-tmq/tmqMaxTopic.py index 5dc49fe48f8b3339b2ee31480437ebf19b8c0a22..62bc9ccb4ed7c6f74d05a7ccc2d81db21945ed0b 100644 --- a/tests/system-test/7-tmq/tmqMaxTopic.py +++ b/tests/system-test/7-tmq/tmqMaxTopic.py @@ -71,7 +71,6 @@ class TDTestCase: tmqCom.initConsumerTable() tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=paraDict["vgroups"],replica=1) - tdSql.execute("alter database %s wal_retention_period 3600" % (paraDict['dbName'])) tdLog.info("create stb") tmqCom.create_stable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"]) tdLog.info("create ctb") @@ -216,41 +215,142 @@ class TDTestCase: tdLog.info("create topic sql: %s"%sqlString) tdSql.error(sqlString) - # pThreadList = [] - # for i in range(self.tmqMaxTopicNum): - # topic_name = f"%s%d" %(topicNamePrefix, i) - # print("======%s"%(topic_name)) - # group_id_prefix = f"grp_%d"%(i) - # inputDict = {'group_id_prefix': group_id_prefix, - # 'topic_name': topic_name, - # 'pollDelay': 1 - # } - - # pThread = self.asyncSubscribe(inputDict) - # pThreadList.append(pThread) - - # for j in range(self.tmqMaxGroups): - # pThreadList[j].join() - - # time.sleep(5) - # tdSql.query('show subscriptions;') - # subscribeNum = tdSql.queryRows - # expectNum = self.tmqMaxGroups * self.tmqMaxTopicNum - # tdLog.info("loop index: %d, ======subscriptions %d and expect num: %d"%(i, subscribeNum, expectNum)) - # if subscribeNum != expectNum: - # tdLog.exit("subscriptions %d not equal expect num: %d"%(subscribeNum, expectNum)) - - # # drop all topics - # for i in range(self.tmqMaxTopicNum): - # sqlString = "drop topic %s%d" %(topicNamePrefix, i) - # tdLog.info("drop topic sql: %s"%sqlString) - # tdSql.execute(sqlString) + tdLog.info("drop database when there are topic") + sqlString = "drop database %s" %(paraDict['dbName']) + tdLog.info("drop database sql: %s"%sqlString) + tdSql.error(sqlString) + + tdLog.info("drop all topic for re-create") + tdSql.query('show topics;') + topicNum = tdSql.queryRows + tdLog.info(" topic count: %d"%(topicNum)) + for i in range(topicNum): + sqlString = "drop topic %s" %(tdSql.getData(i, 0)) + tdLog.info("drop topic sql: %s"%sqlString) + tdSql.execute(sqlString) + + time.sleep(1) + + tdLog.info("re-create topics") + topicNamePrefix = 'newTopic_' + queryString = "select * from %s.%s"%(paraDict['dbName'], paraDict['stbName']) + for i in range(topicNum): + sqlString = "create topic %s%d as %s" %(topicNamePrefix, i, queryString) + tdLog.info("create topic sql: %s"%sqlString) + tdSql.execute(sqlString) + + #=================================================# + tdLog.info("drop all topic for testcase2") + tdSql.query('show topics;') + topicNum = tdSql.queryRows + tdLog.info(" topic count: %d"%(topicNum)) + for i in range(topicNum): + sqlString = "drop topic %s" %(tdSql.getData(i, 0)) + tdLog.info("drop topic sql: %s"%sqlString) + tdSql.execute(sqlString) - tdLog.printNoPrefix("======== test case 1 end ...... ") + tdLog.printNoPrefix("======== test case 1 end ...... ") + + + def tmqCase2(self): + tdLog.printNoPrefix("======== test case 2: test topic name len") + paraDict = {'dbName': 'dbt', + 'dropFlag': 1, + 'event': '', + 'vgroups': 1, + 'stbName': 'stb', + 'colPrefix': 'c', + 'tagPrefix': 't', + 'colSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'DOUBLE', 'count':1},{'type': 'BINARY', 'len':32, 'count':1},{'type': 'NCHAR', 'len':32, 'count':1},{'type': 'TIMESTAMP', 'count':1}], + 'tagSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'DOUBLE', 'count':1},{'type': 'BINARY', 'len':32, 'count':1},{'type': 'NCHAR', 'len':32, 'count':1}], + 'ctbPrefix': 'ctb', + 'ctbStartIdx': 0, + 'ctbNum': 10, + 'rowsPerTbl': 10, + 'batchNum': 10, + 'startTs': 1640966400000, # 2022-01-01 00:00:00.000 + 'pollDelay': 3, + 'showMsg': 1, + 'showRow': 1, + 'snapshot': 1} + paraDict['vgroups'] = self.vgroups + paraDict['ctbNum'] = self.ctbNum + paraDict['rowsPerTbl'] = self.rowsPerTbl + + queryString = "select * from %s.%s"%(paraDict['dbName'], paraDict['stbName']) + totalTopicNum = 0 + + topicName = 'a' + sqlString = "create topic %s as %s" %(topicName, queryString) + tdLog.info("create topic sql: %s"%sqlString) + tdSql.query(sqlString) + totalTopicNum += 1 + + topicName = '3' + sqlString = "create topic %s as %s" %(topicName, queryString) + tdLog.info("create topic sql: %s"%sqlString) + tdSql.error(sqlString) + totalTopicNum += 0 + + topicName = '_1' + sqlString = "create topic %s as %s" %(topicName, queryString) + tdLog.info("create topic sql: %s"%sqlString) + tdSql.query(sqlString) + totalTopicNum += 1 + + topicName = 'a\\' + sqlString = "create topic %s as %s" %(topicName, queryString) + tdLog.info("create topic sql: %s"%sqlString) + tdSql.error(sqlString) + totalTopicNum += 0 + + topicName = 'a\*\&\^' + sqlString = "create topic %s as %s" %(topicName, queryString) + tdLog.info("create topic sql: %s"%sqlString) + tdSql.error(sqlString) + totalTopicNum += 0 + + str191char = 'a' + for i in range(190): + str191char = ('%s%d'%(str191char, 1)) + + topicName = str191char + 'a' + + if (192 != len(topicName)): + tdLog.exit("topicName len error") + + sqlString = "create topic %s as %s" %(topicName, queryString) + tdLog.info("create topic sql: %s"%sqlString) + tdSql.query(sqlString) + totalTopicNum += 1 + + topicName = str191char + '12' + sqlString = "create topic %s as %s" %(topicName, queryString) + tdLog.info("create topic sql: %s"%sqlString) + tdSql.error(sqlString) + totalTopicNum += 0 + + # topicName = str192char + '12' + # sqlString = "create topic %s as %s" %(topicName, queryString) + # tdLog.info("create topic sql: %s"%sqlString) + # tdSql.error(sqlString) + # totalTopicNum += 0 + + # check topic count + tdSql.query('show topics;') + topicNum = tdSql.queryRows + tdLog.info(" topic count: %d"%(topicNum)) + if topicNum != totalTopicNum: + tdLog.exit("show topics %d not equal expect num: %d"%(topicNum, totalTopicNum)) + + + tdLog.printNoPrefix("======== test case 2 end ...... ") + def run(self): self.prepareTestEnv() self.tmqCase1() + self.tmqCase2() def stop(self): tdSql.close() diff --git a/tests/system-test/7-tmq/tmqOffset.py b/tests/system-test/7-tmq/tmqOffset.py new file mode 100644 index 0000000000000000000000000000000000000000..500c6f53e4efc591c9592db37a990509c23dd7e3 --- /dev/null +++ b/tests/system-test/7-tmq/tmqOffset.py @@ -0,0 +1,399 @@ + +import sys +import re +import time +import threading +from taos.tmq import Consumer +from util.log import * +from util.sql import * +from util.cases import * +from util.dnodes import * +from util.common import * +sys.path.append("./7-tmq") +from tmqCommon import * + +class TDTestCase: + updatecfgDict = {'debugFlag': 135} + + def __init__(self): + self.vgroups = 2 + self.ctbNum = 1 + self.rowsPerTbl = 10000 + self.tmqMaxTopicNum = 10 + self.tmqMaxGroups = 10 + + self.TSDB_CODE_TMQ_VERSION_OUT_OF_RANGE = '0x4007' + self.TSDB_CODE_TMQ_INVALID_VGID = '0x4008' + self.TSDB_CODE_TMQ_INVALID_TOPIC = '0x4009' + + + + def init(self, conn, logSql, replicaVar=1): + self.replicaVar = int(replicaVar) + tdLog.debug(f"start to excute {__file__}") + tdSql.init(conn.cursor(), False) + + def getPath(self, tool="taosBenchmark"): + if (platform.system().lower() == 'windows'): + tool = tool + ".exe" + selfPath = os.path.dirname(os.path.realpath(__file__)) + + if ("community" in selfPath): + projPath = selfPath[:selfPath.find("community")] + else: + projPath = selfPath[:selfPath.find("tests")] + + paths = [] + for root, dirs, files in os.walk(projPath): + if ((tool) in files): + rootRealPath = os.path.dirname(os.path.realpath(root)) + if ("packaging" not in rootRealPath): + paths.append(os.path.join(root, tool)) + break + if (len(paths) == 0): + tdLog.exit("taosBenchmark not found!") + return + else: + tdLog.info("taosBenchmark found in %s" % paths[0]) + return paths[0] + + def prepareTestEnv(self): + tdLog.printNoPrefix("======== prepare test env include database, stable, ctables, and insert data: ") + paraDict = {'dbName': 'dbt', + 'dropFlag': 1, + 'event': '', + 'vgroups': 1, + 'stbName': 'stb', + 'colPrefix': 'c', + 'tagPrefix': 't', + # 'colSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'DOUBLE', 'count':1},{'type': 'BINARY', 'len':32, 'count':1},{'type': 'NCHAR', 'len':32, 'count':1},{'type': 'TIMESTAMP', 'count':1}], + # 'tagSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'DOUBLE', 'count':1},{'type': 'BINARY', 'len':32, 'count':1},{'type': 'NCHAR', 'len':32, 'count':1}], + + 'colSchema': [{'type': 'INT', 'count':1}], + 'tagSchema': [{'type': 'INT', 'count':1}], + + + 'ctbPrefix': 'ctb', + 'ctbStartIdx': 0, + 'ctbNum': 1, + 'rowsPerTbl': 10, + 'batchNum': 100, + 'startTs': 1640966400000, # 2022-01-01 00:00:00.000 + 'pollDelay': 10, + 'showMsg': 1, + 'showRow': 1, + 'snapshot': 1} + + paraDict['vgroups'] = self.vgroups + paraDict['ctbNum'] = self.ctbNum + paraDict['rowsPerTbl'] = self.rowsPerTbl + + # tmqCom.initConsumerTable() + tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=paraDict["vgroups"],replica=1,wal_retention_period=36000) + # tdSql.execute("alter database %s wal_retention_period 360000" % (paraDict['dbName'])) + tdLog.info("create stb") + tmqCom.create_stable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"]) + tdLog.info("create ctb") + tmqCom.create_ctable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"],ctbPrefix=paraDict['ctbPrefix'], + ctbNum=paraDict["ctbNum"],ctbStartIdx=paraDict['ctbStartIdx']) + tdLog.info("insert data") + tmqCom.insert_data_interlaceByMultiTbl(tsql=tdSql,dbName=paraDict["dbName"],ctbPrefix=paraDict["ctbPrefix"], + ctbNum=paraDict["ctbNum"],rowsPerTbl=paraDict["rowsPerTbl"],batchNum=paraDict["batchNum"], + startTs=paraDict["startTs"],ctbStartIdx=paraDict['ctbStartIdx']) + + tdLog.info("restart taosd to ensure that the data falls into the disk") + # tdDnodes.stop(1) + # tdDnodes.start(1) + tdSql.query("flush database %s"%(paraDict['dbName'])) + return + + def tmqPollAllRows(self, consumer): + totalRows = 0 + + res = consumer.poll(10) + while (res): + if not res: + break + err = res.error() + if err is not None: + raise err + + val = res.value() + # print(len(val)) + for block in val: + # print(block.fetchall()) + # print(len(block.fetchall())) + totalRows += len(block.fetchall()) + + res = consumer.poll(10) + + tdLog.info("poll total rows: %d"%(totalRows)) + return totalRows + + def tmqPollRowsByOne(self, consumer): + rows = 0 + res = consumer.poll(3) + if not res: + return rows + err = res.error() + if err is not None: + raise err + val = res.value() + + # print(len(val)) + + for block in val: + # print(block.fetchall()) + # print(len(block.fetchall())) + rows += len(block.fetchall()) + + return rows + + def tmqOffsetTest(self, consumer): + # get topic assignment + tdLog.info("before poll get offset status:") + assignments = consumer.assignment() + for assignment in assignments: + print(assignment) + + # poll + # consumer.poll(5) + rows = self.tmqPollRowsByOne(consumer) + tdLog.info("poll rows: %d"%(rows)) + + # get topic assignment + tdLog.info("after first poll get offset status:") + assignments = consumer.assignment() + for assignment in assignments: + print(assignment) + + + rows = self.tmqPollRowsByOne(consumer) + tdLog.info("poll rows: %d"%(rows)) + + # get topic assignment + tdLog.info("after second poll get offset status:") + assignments = consumer.assignment() + for assignment in assignments: + print(assignment) + + + return + + def tmqSubscribe(self, inputDict): + consumer_dict = { + "group.id": inputDict['group_id'], + "client.id": "client", + "td.connect.user": "root", + "td.connect.pass": "taosdata", + "auto.commit.interval.ms": "1000", + "enable.auto.commit": inputDict['auto_commit'], + "auto.offset.reset": inputDict['offset_reset'], + "experimental.snapshot.enable": "false", + "msg.with.table.name": "false" + } + + consumer = Consumer(consumer_dict) + try: + consumer.subscribe([inputDict['topic_name']]) + except Exception as e: + tdLog.info("consumer.subscribe() fail ") + tdLog.info("%s"%(e)) + + # rows = self.tmqPollAllRows(consumer) + tdLog.info("create consumer success!") + return consumer + + def tmqConsumer(self, **inputDict): + consumer = self.tmqSubscribe(inputDict) + self.tmqPollAllRows(consumer) + # consumer.unsubscribe() + # consumer.close() + return + + def asyncSubscribe(self, inputDict): + pThread = threading.Thread(target=self.tmqConsumer, kwargs=inputDict) + pThread.start() + return pThread + + def seekErrorVgid(self, consumer, assignment): + ####################### test1: error vgid + assignmentNew = assignment + # assignment.topic + assignmentNew.partition = assignment.partition + self.vgroups + self.vgroups + # assignment.offset + # consumer.seek(assignment) + + errCodeStr = '' + try: + print("seek parameters:", assignmentNew) + consumer.seek(assignmentNew) + except Exception as e: + tdLog.info("error: %s"%(e)) + + rspString = str(e) + start = "[" + end = "]" + + start_index = rspString.index(start) + len(start) + end_index = rspString.index(end) + + errCodeStr = rspString[start_index:end_index] + # print(errCodeStr) + tdLog.info("error code: %s"%(errCodeStr)) + + if (self.TSDB_CODE_TMQ_INVALID_VGID != errCodeStr): + tdLog.exit("tmq seek should return error code: %s"%(self.TSDB_CODE_TMQ_INVALID_VGID)) + + def seekErrorTopic(self, consumer, assignment): + assignmentNew = assignment + assignmentNew.topic = 'errorToipcName' + # assignment.partition + # assignment.offset + # consumer.seek(assignment) + + errCodeStr = '' + try: + print("seek parameters:", assignmentNew) + consumer.seek(assignmentNew) + except Exception as e: + tdLog.info("error: %s"%(e)) + + rspString = str(e) + start = "[" + end = "]" + + start_index = rspString.index(start) + len(start) + end_index = rspString.index(end) + + errCodeStr = rspString[start_index:end_index] + # print(errCodeStr) + tdLog.info("error code: %s"%(errCodeStr)) + + if (self.TSDB_CODE_TMQ_INVALID_TOPIC != errCodeStr): + tdLog.exit("tmq seek should return error code: %s"%(self.TSDB_CODE_TMQ_INVALID_TOPIC)) + + def seekErrorVersion(self, consumer, assignment): + assignmentNew = assignment + # print(assignment.topic, assignment.partition, assignment.offset) + # assignment.topic + # assignment.partition + assignmentNew.offset = assignment.offset + self.rowsPerTbl * 100000 + # consumer.seek(assignment) + + errCodeStr = '' + try: + # print(assignmentNew.topic, assignmentNew.partition, assignmentNew.offset) + print("seek parameters:", assignmentNew) + consumer.seek(assignmentNew) + except Exception as e: + tdLog.info("error: %s"%(e)) + + rspString = str(e) + start = "[" + end = "]" + + start_index = rspString.index(start) + len(start) + end_index = rspString.index(end) + + errCodeStr = rspString[start_index:end_index] + # print(errCodeStr) + tdLog.info("error code: %s"%(errCodeStr)) + + if (self.TSDB_CODE_TMQ_VERSION_OUT_OF_RANGE != errCodeStr): + tdLog.exit("tmq seek should return error code: %s"%(self.TSDB_CODE_TMQ_VERSION_OUT_OF_RANGE)) + + def tmqCase1(self): + tdLog.printNoPrefix("======== test case 1: ") + paraDict = {'dbName': 'dbt', + 'dropFlag': 1, + 'event': '', + 'vgroups': 1, + 'stbName': 'stb', + 'colPrefix': 'c', + 'tagPrefix': 't', + 'colSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'DOUBLE', 'count':1},{'type': 'BINARY', 'len':32, 'count':1},{'type': 'NCHAR', 'len':32, 'count':1},{'type': 'TIMESTAMP', 'count':1}], + 'tagSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'DOUBLE', 'count':1},{'type': 'BINARY', 'len':32, 'count':1},{'type': 'NCHAR', 'len':32, 'count':1}], + 'ctbPrefix': 'ctb', + 'ctbStartIdx': 0, + 'ctbNum': 1, + 'rowsPerTbl': 100000000, + 'batchNum': 10, + 'startTs': 1640966400000, # 2022-01-01 00:00:00.000 + 'pollDelay': 3, + 'showMsg': 1, + 'showRow': 1, + 'snapshot': 1} + + paraDict['vgroups'] = self.vgroups + paraDict['ctbNum'] = self.ctbNum + paraDict['rowsPerTbl'] = self.rowsPerTbl + + # ntbName = 'ntb' + # sqlString = "create table %s.%s (ts timestamp, c int)"%(paraDict['dbName'], ntbName) + # tdLog.info("create ntb sql: %s"%sqlString) + # tdSql.execute(sqlString) + + topicName = 'offset_tp' + # queryString = "select * from %s.%s"%(paraDict['dbName'], ntbName) + queryString = "select * from %s.%s"%(paraDict['dbName'], paraDict['stbName']) + sqlString = "create topic %s as %s" %(topicName, queryString) + tdLog.info("create topic sql: %s"%sqlString) + tdSql.execute(sqlString) + + inputDict = { + "topic_name": topicName, + "group_id": "offsetGrp", + "auto_commit": "true", + "offset_reset": "earliest" + } + + pThread = self.asyncSubscribe(inputDict) + # pThread.join() + + consumer = self.tmqSubscribe(inputDict) + # get topic assignment + assignments = consumer.assignment() + # print(type(assignments)) + for assignment in assignments: + print(assignment) + + assignment = assignments[0] + topic = assignment.topic + partition = assignment.partition + offset = assignment.offset + + tdLog.info("======== test error vgid =======") + print("current assignment: ", assignment) + self.seekErrorVgid(consumer, assignment) + + tdLog.info("======== test error topic =======") + assignment.topic = topic + assignment.partition = partition + assignment.offset = offset + print("current assignment: ", assignment) + self.seekErrorTopic(consumer, assignment) + + tdLog.info("======== test error version =======") + assignment.topic = topic + assignment.partition = partition + assignment.offset = offset + print("current assignment: ", assignment) + self.seekErrorVersion(consumer, assignment) + + pThread.join() + + tdLog.printNoPrefix("======== test case 1 end ...... ") + + def run(self): + self.prepareTestEnv() + self.tmqCase1() + + def stop(self): + tdSql.close() + tdLog.success(f"{__file__} successfully executed") + +event = threading.Event() + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) diff --git a/tests/system-test/7-tmq/tmqParamsTest.py b/tests/system-test/7-tmq/tmqParamsTest.py index d85b700a1d3fa3d452f3516f3b3ccdf7e9c5b193..0fc7a6cdd98613e684776f6abc3b9077149b6254 100644 --- a/tests/system-test/7-tmq/tmqParamsTest.py +++ b/tests/system-test/7-tmq/tmqParamsTest.py @@ -26,7 +26,7 @@ class TDTestCase: # self.commit_value_list = ["true"] # self.offset_value_list = [""] # self.tbname_value_list = ["true"] - # self.snapshot_value_list = ["true"] + # self.snapshot_value_list = ["false"] def tmqParamsTest(self): paraDict = {'dbName': 'db1', @@ -131,8 +131,8 @@ class TDTestCase: if snapshot_value == "true": if offset_value != "earliest" and offset_value != "": if offset_value == "latest": - offset_value_list = list(map(lambda x: int(x[-2].replace("wal:", "").replace("earliest", "0")), subscription_info)) - tdSql.checkEqual(sum(offset_value_list) > 0, True) + offset_value_list = list(map(lambda x: int(x[-2].replace("wal:", "").replace(offset_value, "0")), subscription_info)) + tdSql.checkEqual(sum(offset_value_list) >= 0, True) rows_value_list = list(map(lambda x: int(x[-1]), subscription_info)) tdSql.checkEqual(sum(rows_value_list), expected_res) elif offset_value == "none": @@ -154,8 +154,8 @@ class TDTestCase: tdSql.checkEqual(rows_value_list, [None]*len(subscription_info)) else: if offset_value != "none": - offset_value_list = list(map(lambda x: int(x[-2].replace("wal:", "").replace("earliest", "0")), subscription_info)) - tdSql.checkEqual(sum(offset_value_list) > 0, True) + offset_value_list = list(map(lambda x: int(x[-2].replace("wal:", "").replace(offset_value, "0")), subscription_info)) + tdSql.checkEqual(sum(offset_value_list) >= 0, True) rows_value_list = list(map(lambda x: int(x[-1]), subscription_info)) tdSql.checkEqual(sum(rows_value_list), expected_res) else: diff --git a/tests/system-test/7-tmq/tmqShow.py b/tests/system-test/7-tmq/tmqShow.py index e9234f6c7aea99cf339be76ed799b34d3ec44721..31ddc1b0f831df5d6e10f4337536488ddeaa892e 100644 --- a/tests/system-test/7-tmq/tmqShow.py +++ b/tests/system-test/7-tmq/tmqShow.py @@ -51,7 +51,6 @@ class TDTestCase: consumerIdList = [0, 1, 2, 3] tmqCom.initConsumerTable() tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=paraDict['vgroups'],replica=1) - tdSql.execute("alter database %s wal_retention_period 3600" % (paraDict['dbName'])) tdLog.info("create stb") tdCom.create_stable(tdSql, dbname=paraDict["dbName"],stbname=paraDict["stbName"], column_elm_list=paraDict['colSchema'], tag_elm_list=paraDict['tagSchema']) tdLog.info("create ctb") diff --git a/tests/system-test/7-tmq/tmqSubscribeStb-r3.py b/tests/system-test/7-tmq/tmqSubscribeStb-r3.py index 7f322dc2583201bce47f40a9c1883daabadf8e42..85222a941bf466631feedd1ead0387ef5f984096 100644 --- a/tests/system-test/7-tmq/tmqSubscribeStb-r3.py +++ b/tests/system-test/7-tmq/tmqSubscribeStb-r3.py @@ -94,7 +94,6 @@ class TDTestCase: tmqCom.initConsumerTable() tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=paraDict["vgroups"],replica=self.replica) - tdSql.execute("alter database %s wal_retention_period 3600" % (paraDict['dbName'])) tdLog.info("create stb") tmqCom.create_stable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"]) tdLog.info("create ctb") diff --git a/tests/system-test/7-tmq/tmqUdf-multCtb-snapshot0.py b/tests/system-test/7-tmq/tmqUdf-multCtb-snapshot0.py index bee38ca8ee192c542ebc2c09718d4e4224904c1e..5f9fb42c453ed49d16461ec7ec597bff69c3ed46 100644 --- a/tests/system-test/7-tmq/tmqUdf-multCtb-snapshot0.py +++ b/tests/system-test/7-tmq/tmqUdf-multCtb-snapshot0.py @@ -88,7 +88,6 @@ class TDTestCase: tmqCom.initConsumerTable() tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=paraDict["vgroups"],replica=1) - tdSql.execute("alter database %s wal_retention_period 3600" % (paraDict['dbName'])) tdLog.info("create stb") tmqCom.create_stable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"]) tdLog.info("create ctb") @@ -136,7 +135,6 @@ class TDTestCase: expectRowsList = [] tmqCom.initConsumerTable() # tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=4,replica=1) - # tdSql.execute("alter database %s wal_retention_period 3600" % (paraDict['dbName'])) # tdLog.info("create stb") # tdCom.create_stable(tdSql, dbname=paraDict["dbName"],stbname=paraDict["stbName"], column_elm_list=paraDict['colSchema'], tag_elm_list=paraDict['tagSchema']) # tdLog.info("create ctb") @@ -239,7 +237,6 @@ class TDTestCase: expectRowsList = [] tmqCom.initConsumerTable() # tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=4,replica=1) - # tdSql.execute("alter database %s wal_retention_period 3600" % (paraDict['dbName'])) # tdLog.info("create stb") # tdCom.create_stable(tdSql, dbname=paraDict["dbName"],stbname=paraDict["stbName"], column_elm_list=paraDict['colSchema'], tag_elm_list=paraDict['tagSchema']) # tdLog.info("create ctb") diff --git a/tests/system-test/7-tmq/tmqUdf-multCtb-snapshot1.py b/tests/system-test/7-tmq/tmqUdf-multCtb-snapshot1.py index d3b64d2b2150521c1a3bc98fe41734390609084f..6278527c64061806013e64367c65bc685d76386b 100644 --- a/tests/system-test/7-tmq/tmqUdf-multCtb-snapshot1.py +++ b/tests/system-test/7-tmq/tmqUdf-multCtb-snapshot1.py @@ -88,7 +88,6 @@ class TDTestCase: tmqCom.initConsumerTable() tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=paraDict["vgroups"],replica=1) - tdSql.execute("alter database %s wal_retention_period 3600" % (paraDict['dbName'])) tdLog.info("create stb") tmqCom.create_stable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"]) tdLog.info("create ctb") @@ -136,7 +135,6 @@ class TDTestCase: expectRowsList = [] tmqCom.initConsumerTable() # tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=4,replica=1) - # tdSql.execute("alter database %s wal_retention_period 3600" % (paraDict['dbName'])) # tdLog.info("create stb") # tdCom.create_stable(tdSql, dbname=paraDict["dbName"],stbname=paraDict["stbName"], column_elm_list=paraDict['colSchema'], tag_elm_list=paraDict['tagSchema']) # tdLog.info("create ctb") @@ -239,7 +237,6 @@ class TDTestCase: expectRowsList = [] tmqCom.initConsumerTable() # tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=4,replica=1) - # tdSql.execute("alter database %s wal_retention_period 3600" % (paraDict['dbName'])) # tdLog.info("create stb") # tdCom.create_stable(tdSql, dbname=paraDict["dbName"],stbname=paraDict["stbName"], column_elm_list=paraDict['colSchema'], tag_elm_list=paraDict['tagSchema']) # tdLog.info("create ctb") diff --git a/tests/system-test/7-tmq/tmqUdf.py b/tests/system-test/7-tmq/tmqUdf.py index 5da1625cb125091dddc6ddbe3e635dc404352bbb..8af4406f780916087cbf5c6a8fffe549d90b42af 100644 --- a/tests/system-test/7-tmq/tmqUdf.py +++ b/tests/system-test/7-tmq/tmqUdf.py @@ -88,7 +88,6 @@ class TDTestCase: tmqCom.initConsumerTable() tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=paraDict["vgroups"],replica=1) - tdSql.execute("alter database %s wal_retention_period 3600" % (paraDict['dbName'])) tdLog.info("create stb") tmqCom.create_stable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"]) tdLog.info("create ctb") @@ -136,7 +135,6 @@ class TDTestCase: expectRowsList = [] tmqCom.initConsumerTable() # tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=4,replica=1) - # tdSql.execute("alter database %s wal_retention_period 3600" % (paraDict['dbName'])) # tdLog.info("create stb") # tdCom.create_stable(tdSql, dbname=paraDict["dbName"],stbname=paraDict["stbName"], column_elm_list=paraDict['colSchema'], tag_elm_list=paraDict['tagSchema']) # tdLog.info("create ctb") @@ -240,7 +238,6 @@ class TDTestCase: expectRowsList = [] tmqCom.initConsumerTable() # tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=4,replica=1) - # tdSql.execute("alter database %s wal_retention_period 3600" % (paraDict['dbName'])) # tdLog.info("create stb") # tdCom.create_stable(tdSql, dbname=paraDict["dbName"],stbname=paraDict["stbName"], column_elm_list=paraDict['colSchema'], tag_elm_list=paraDict['tagSchema']) # tdLog.info("create ctb") diff --git a/tests/system-test/7-tmq/tmqUpdate-1ctb.py b/tests/system-test/7-tmq/tmqUpdate-1ctb.py index 8fdf7748a3425b1efe7966949a1098e5867b6ee2..920e8e77e466eaf27c99fd9ac462bbd27a18ed94 100644 --- a/tests/system-test/7-tmq/tmqUpdate-1ctb.py +++ b/tests/system-test/7-tmq/tmqUpdate-1ctb.py @@ -54,7 +54,6 @@ class TDTestCase: tmqCom.initConsumerTable() tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=paraDict["vgroups"],replica=1) - tdSql.execute("alter database %s wal_retention_period 3600" % (paraDict['dbName'])) tdLog.info("create stb") tmqCom.create_stable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"]) tdLog.info("create ctb") diff --git a/tests/system-test/7-tmq/tmqUpdate-multiCtb-snapshot0.py b/tests/system-test/7-tmq/tmqUpdate-multiCtb-snapshot0.py index 8b67f6f8252e5e3682284c5c2d5b4295308db26c..2f1d3e263175e2adedc6fad6f4693a0fcda0d4bb 100644 --- a/tests/system-test/7-tmq/tmqUpdate-multiCtb-snapshot0.py +++ b/tests/system-test/7-tmq/tmqUpdate-multiCtb-snapshot0.py @@ -55,7 +55,6 @@ class TDTestCase: tmqCom.initConsumerTable() tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=paraDict["vgroups"],replica=1) - tdSql.execute("alter database %s wal_retention_period 3600" % (paraDict['dbName'])) tdLog.info("create stb") tmqCom.create_stable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"]) tdLog.info("create ctb") diff --git a/tests/system-test/7-tmq/tmqUpdate-multiCtb-snapshot1.py b/tests/system-test/7-tmq/tmqUpdate-multiCtb-snapshot1.py index 5a35c4f5ee84c950ee7a611e5be93fe13371b031..6b8c10de271b28726639baad8648e2586c02f9a1 100644 --- a/tests/system-test/7-tmq/tmqUpdate-multiCtb-snapshot1.py +++ b/tests/system-test/7-tmq/tmqUpdate-multiCtb-snapshot1.py @@ -55,7 +55,6 @@ class TDTestCase: tmqCom.initConsumerTable() tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=paraDict["vgroups"],replica=1) - tdSql.execute("alter database %s wal_retention_period 3600" % (paraDict['dbName'])) tdLog.info("create stb") tmqCom.create_stable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"]) tdLog.info("create ctb") diff --git a/tests/system-test/7-tmq/tmqUpdate-multiCtb.py b/tests/system-test/7-tmq/tmqUpdate-multiCtb.py index 84617efae4d78cf57cf6d522a7b3fbf50321e5f2..3975013e747f09d8cc0548b5e32f9f01c63fb9a4 100644 --- a/tests/system-test/7-tmq/tmqUpdate-multiCtb.py +++ b/tests/system-test/7-tmq/tmqUpdate-multiCtb.py @@ -55,7 +55,6 @@ class TDTestCase: tmqCom.initConsumerTable() tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=paraDict["vgroups"],replica=1) - tdSql.execute("alter database %s wal_retention_period 3600" % (paraDict['dbName'])) tdLog.info("create stb") tmqCom.create_stable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"]) tdLog.info("create ctb") diff --git a/tests/system-test/99-TDcase/TD-16821.py b/tests/system-test/99-TDcase/TD-16821.py index 2e23002059c5d157b3e1f6edf2f21a5d291739bc..26b41e6afc9a7e90b8993ef9ca1b8cf33eb27e05 100644 --- a/tests/system-test/99-TDcase/TD-16821.py +++ b/tests/system-test/99-TDcase/TD-16821.py @@ -45,7 +45,6 @@ class TDTestCase: expectRowsList = [] tmqCom.initConsumerTable() tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=4,replica=1) - tdSql.execute("alter database %s wal_retention_period 3600" % (paraDict['dbName'])) tdLog.info("create stb") tdCom.create_stable(tdSql, dbname=paraDict["dbName"],stbname=paraDict["stbName"], column_elm_list=paraDict['colSchema'], tag_elm_list=paraDict['tagSchema']) tdLog.info("create ctb") diff --git a/tests/system-test/99-TDcase/TD-17255.py b/tests/system-test/99-TDcase/TD-17255.py index 5f68a5b7389d3ea095a8886e2801dd82fb6af8d9..0f83468754783ee5adca78113c7a8df5c78bbe99 100644 --- a/tests/system-test/99-TDcase/TD-17255.py +++ b/tests/system-test/99-TDcase/TD-17255.py @@ -53,7 +53,6 @@ class TDTestCase: tmqCom.initConsumerTable() tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=paraDict["vgroups"],replica=1) - tdSql.execute("alter database %s wal_retention_period 3600" % (paraDict['dbName'])) tdLog.info("create stb") tmqCom.create_stable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"]) tdLog.info("create ctb") @@ -98,7 +97,6 @@ class TDTestCase: tmqCom.initConsumerTable() tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=paraDict["vgroups"],replica=1) - tdSql.execute("alter database %s wal_retention_period 3600" % (paraDict['dbName'])) tdLog.info("create stb") tmqCom.create_stable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"]) tdLog.info("create ctb") @@ -183,7 +181,6 @@ class TDTestCase: tmqCom.initConsumerTable() tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=paraDict["vgroups"],replica=1) - tdSql.execute("alter database %s wal_retention_period 3600" % (paraDict['dbName'])) tdLog.info("create stb") tmqCom.create_stable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"]) tdLog.info("create ctb") @@ -270,7 +267,6 @@ class TDTestCase: tmqCom.initConsumerTable() tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=paraDict["vgroups"],replica=1) - tdSql.execute("alter database %s wal_retention_period 3600" % (paraDict['dbName'])) tdLog.info("create stb") tmqCom.create_stable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"]) tdLog.info("insert data by auto create ctb") diff --git a/tests/system-test/99-TDcase/TD-17699.py b/tests/system-test/99-TDcase/TD-17699.py index 6956e88aec7d0c8adb84c8af9aff31cb6d80fa1f..2862f4a78d5391098f0493afa35e482c5a032817 100644 --- a/tests/system-test/99-TDcase/TD-17699.py +++ b/tests/system-test/99-TDcase/TD-17699.py @@ -65,7 +65,6 @@ class TDTestCase: tmqCom.initConsumerTable(self.cdbName) tdCom.create_database(tdSql,self.paraDict["dbName"],self.paraDict["dropFlag"]) - tdSql.execute("alter database %s wal_retention_period 3600" % (paraDict['dbName'])) self.paraDict["stbName"] = 'stb1' tdCom.create_stable(tdSql,dbname=self.paraDict["dbName"],stbname=self.paraDict["stbName"],column_elm_list=self.paraDict["colSchema"],tag_elm_list=self.paraDict["tagSchema"],count=1, default_stbname_prefix=self.paraDict["stbName"]) diff --git a/tests/system-test/eco-system/manager/cmul.py b/tests/system-test/eco-system/manager/cmul.py new file mode 100644 index 0000000000000000000000000000000000000000..ac2fa5e4f28743513e7c78c3a9c3ecbc5c7592e9 --- /dev/null +++ b/tests/system-test/eco-system/manager/cmul.py @@ -0,0 +1,104 @@ +################################################################### +# Copyright (c) 2016 by TAOS Technologies, Inc. +# All rights reserved. +# +# This file is proprietary and confidential to TAOS Technologies. +# No part of this file may be reproduced, stored, transmitted, +# disclosed or used in any form or by any means other than as +# expressly provided by the written permission from Jianhui Tao +# +################################################################### + +# -*- coding: utf-8 -*- + +# +# The option for wal_retetion_period and wal_retention_size is work well +# + +import taos +from taos.tmq import Consumer + +import os +import sys +import threading +import json +import time +import random +from datetime import date +from datetime import datetime +from datetime import timedelta +from os import path + + +topicName = "topic" +topicNum = 100 + +# consume topic +def consume_topic(topic_name, group,consume_cnt, index, wait): + consumer = Consumer( + { + "group.id": group, + "td.connect.user": "root", + "td.connect.pass": "taosdata", + "enable.auto.commit": "true", + } + ) + + print(f"start consumer topic:{topic_name} group={group} index={index} ...") + consumer.subscribe([topic_name]) + cnt = 0 + try: + while True and cnt < consume_cnt: + res = consumer.poll(1) + if not res: + if wait: + continue + else: + break + err = res.error() + if err is not None: + raise err + val = res.value() + cnt += 1 + print(f" consume {cnt} ") + for block in val: + datas = block.fetchall() + data = datas[0][:50] + + print(f" {topic_name}_{group}_{index} {cnt} {data}") + + finally: + consumer.unsubscribe() + consumer.close() + +def consumerThread(index): + global topicName, topicNum + print(f' thread {index} start...') + while True: + idx = random.randint(0, topicNum - 1) + name = f"{topicName}{idx}" + group = f"group_{index}_{idx}" + consume_topic(name, group, 100, index, True) + + + +if __name__ == "__main__": + print(sys.argv) + threadCnt = 10 + + if len(sys.argv) == 1: + threadCnt = int(sys.argv[1]) + + + threads = [] + print(f'consumer with {threadCnt} threads...') + for i in range(threadCnt): + x = threading.Thread(target=consumerThread, args=(i,)) + x.start() + threads.append(x) + + # wait + for i, thread in enumerate(threads): + thread.join() + print(f'join thread {i} end.') + diff --git a/tests/system-test/eco-system/manager/mul.py b/tests/system-test/eco-system/manager/mul.py new file mode 100644 index 0000000000000000000000000000000000000000..d78b63d386209d06a78ff9f77aa8552d08f1a181 --- /dev/null +++ b/tests/system-test/eco-system/manager/mul.py @@ -0,0 +1,114 @@ +################################################################### +# Copyright (c) 2016 by TAOS Technologies, Inc. +# All rights reserved. +# +# This file is proprietary and confidential to TAOS Technologies. +# No part of this file may be reproduced, stored, transmitted, +# disclosed or used in any form or by any means other than as +# expressly provided by the written permission from Jianhui Tao +# +################################################################### + +# -*- coding: utf-8 -*- + +import os +import sys +import random +import time + +from util.log import * +from util.cases import * +from util.sql import * +from util.common import * +from util.sqlset import * + +class TDTestCase: + def init(self, conn, logSql, replicaVar=1): + self.replicaVar = int(replicaVar) + tdLog.debug("start to execute %s" % __file__) + tdSql.init(conn.cursor()) + self.setsql = TDSetSql() + + # prepareEnv + def prepareEnv(self): + self.dbName = "mullevel" + self.stbName = "meters" + self.topicName = "topic" + self.topicNum = 100 + self.loop = 50000 + + sql = f"use {self.dbName}" + tdSql.execute(sql) + + # generate topic sql + self.sqls = [ + f"select * from {self.stbName}", + f"select * from {self.stbName} where ui < 200", + f"select * from {self.stbName} where fc > 20.1", + f"select * from {self.stbName} where nch like '%%a%%'", + f"select * from {self.stbName} where fc > 20.1", + f"select lower(bin) from {self.stbName} where length(bin) < 10;", + f"select upper(bin) from {self.stbName} where length(nch) > 10;", + f"select upper(bin) from {self.stbName} where ti > 10 or ic < 40;", + f"select * from {self.stbName} where ic < 100 " + ] + + + + # prepareEnv + def createTopics(self): + for i in range(self.topicNum): + topicName = f"{self.topicName}{i}" + sql = random.choice(self.sqls) + createSql = f"create topic if not exists {topicName} as {sql}" + try: + tdSql.execute(createSql, 3, True) + except: + tdLog.info(f" create topic {topicName} failed.") + + + # random del topic + def managerTopics(self): + + for i in range(self.loop): + tdLog.info(f"start modify loop={i}") + idx = random.randint(0, self.topicNum - 1) + # delete + topicName = f"{self.topicName}{idx}" + sql = f"drop topic if exist {topicName}" + try: + tdSql.execute(sql, 3, True) + except: + tdLog.info(f" drop topic {topicName} failed.") + + + # create topic + sql = random.choice(self.sqls) + createSql = f"create topic if not exists {topicName} as {sql}" + try: + tdSql.execute(createSql, 3, True) + except: + tdLog.info(f" create topic {topicName} failed.") + + seconds = [0.1, 0.5, 3, 2.5, 1.5, 0.4, 5.2, 2.6, 0.4, 0.2] + time.sleep(random.choice(seconds)) + + + # run + def run(self): + # prepare env + self.prepareEnv() + + # create topic + self.createTopics() + + # modify topic + self.managerTopics() + + + def stop(self): + tdSql.close() + tdLog.success("%s successfully executed" % __file__) + +tdCases.addWindows(__file__, TDTestCase()) +tdCases.addLinux(__file__, TDTestCase()) diff --git a/tests/system-test/empty.py b/tests/system-test/empty.py new file mode 100644 index 0000000000000000000000000000000000000000..fc44d0164f8059344ce09ea9cc31cdfce140d19f --- /dev/null +++ b/tests/system-test/empty.py @@ -0,0 +1,40 @@ +################################################################### +# Copyright (c) 2016 by TAOS Technologies, Inc. +# All rights reserved. +# +# This file is proprietary and confidential to TAOS Technologies. +# No part of this file may be reproduced, stored, transmitted, +# disclosed or used in any form or by any means other than as +# expressly provided by the written permission from Jianhui Tao +# +################################################################### + +# -*- coding: utf-8 -*- + +import sys +import time + +import taos +from util.log import * +from util.cases import * +from util.sql import * + +class TDTestCase: + # init + def init(self, conn, logSql, replicaVar=1): + self.replicaVar = int(replicaVar) + tdLog.debug(f"start to excute {__file__}") + tdSql.init(conn.cursor(), True) + + # run + def run(self): + # check two db query result same + tdLog.info(f"hello world.") + + # stop + def stop(self): + tdSql.close() + tdLog.success(f"{__file__} successfully executed") + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) \ No newline at end of file diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index e8826584080065245fb5604dfbd07b4e538ca599..ea8b5e61693003cb8d850d5a9f8233a6c41bf6a3 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -3,8 +3,6 @@ IF (TD_WEBSOCKET) SET(websocket_lib_file "libtaosws.so") ELSEIF (TD_DARWIN) SET(websocket_lib_file "libtaosws.dylib") - ELSEIF (TD_WINDOWS) - SET(websocket_lib_file "{taosws.dll,taosws.dll.lib}") ENDIF () MESSAGE("${Green} use libtaos-ws${ColourReset}") IF (TD_ALPINE) @@ -26,6 +24,26 @@ IF (TD_WEBSOCKET) COMMAND cmake -E make_directory ${CMAKE_BINARY_DIR}/build/include COMMAND cmake -E copy target/release/taosws.h ${CMAKE_BINARY_DIR}/build/include ) + ELSEIF (TD_WINDOWS) + include(ExternalProject) + ExternalProject_Add(taosws-rs + PREFIX "taosws-rs" + SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/taosws-rs + BUILD_ALWAYS off + DEPENDS taos + BUILD_IN_SOURCE 1 + CONFIGURE_COMMAND cmake -E echo "taosws-rs no need cmake to config" + PATCH_COMMAND + COMMAND git clean -f -d + BUILD_COMMAND + COMMAND cargo update + COMMAND cargo build --release -p taos-ws-sys --features native-tls-vendored + INSTALL_COMMAND + COMMAND cp target/release/taosws.dll ${CMAKE_BINARY_DIR}/build/lib + COMMAND cp target/release/taosws.dll.lib ${CMAKE_BINARY_DIR}/build/lib/taosws.lib + COMMAND cmake -E make_directory ${CMAKE_BINARY_DIR}/build/include + COMMAND cmake -E copy target/release/taosws.h ${CMAKE_BINARY_DIR}/build/include + ) ELSE() include(ExternalProject) ExternalProject_Add(taosws-rs diff --git a/tools/shell/CMakeLists.txt b/tools/shell/CMakeLists.txt index acc47d49100934083fa9a4ce6ce21bd580b3b144..0ce181808fb8e98dc047f3bf34747d09217ea768 100644 --- a/tools/shell/CMakeLists.txt +++ b/tools/shell/CMakeLists.txt @@ -20,7 +20,7 @@ ELSEIF (TD_DARWIN AND TD_WEBSOCKET) ADD_DEPENDENCIES(shell taosws-rs) ELSEIF (TD_WINDOWS AND TD_WEBSOCKET) ADD_DEFINITIONS(-DWEBSOCKET -I${CMAKE_BINARY_DIR}/build/include) - SET(LINK_WEBSOCKET "${CMAKE_BINARY_DIR}/build/lib/taosws.dll.lib") + SET(LINK_WEBSOCKET "${CMAKE_BINARY_DIR}/build/lib/taosws.lib") ADD_DEPENDENCIES(shell taosws-rs) ELSE () SET(LINK_WEBSOCKET "") diff --git a/tools/shell/src/shellCommand.c b/tools/shell/src/shellCommand.c index 0e305f57e9c781a8149db1fecccd06a703bc1465..8c91ff53e2770fec8118ab9eee7e0121720a2c42 100644 --- a/tools/shell/src/shellCommand.c +++ b/tools/shell/src/shellCommand.c @@ -501,7 +501,7 @@ int32_t shellReadCommand(char *command) { while (1) { c = taosGetConsoleChar(); - if (c == EOF) { + if (c == (char)EOF) { return c; } diff --git a/tools/shell/src/shellUtil.c b/tools/shell/src/shellUtil.c index e15b49efcc35da2682d003243c0a19eb278acbc7..93451c85a9a34545a6aa86e3777d92d462f8849d 100644 --- a/tools/shell/src/shellUtil.c +++ b/tools/shell/src/shellUtil.c @@ -68,7 +68,7 @@ int32_t shellCheckIntSize() { return 0; } -void shellPrintVersion() { printf("version: %s\r\n", version); } +void shellPrintVersion() { printf("%s\r\n", shell.info.programVersion); } void shellGenerateAuth() { char secretEncrypt[TSDB_PASSWORD_LEN + 1] = {0}; diff --git a/utils/test/c/sml_test.c b/utils/test/c/sml_test.c index dad30db02a52db6bcc5d4992039b61b8ec939125..e4ed6037a34d207ae32d4f0d62525c343413a82b 100644 --- a/utils/test/c/sml_test.c +++ b/utils/test/c/sml_test.c @@ -1522,6 +1522,36 @@ int sml_ts2385_Test() { return code; } +int sml_ts3724_Test() { + TAOS *taos = taos_connect("localhost", "root", "taosdata", NULL, 0); + + TAOS_RES *pRes = taos_query(taos, "drop database if exists ts3724"); + taos_free_result(pRes); + + pRes = taos_query(taos, "create database if not exists ts3724"); + taos_free_result(pRes); + + const char *sql[] = { + "stb.2,t1=1 f1=283i32 1632299372000", + ".stb2,t1=1 f1=106i32 1632299378000", + "stb2.,t1=1 f1=106i32 1632299378000", + }; + + pRes = taos_query(taos, "use ts3724"); + taos_free_result(pRes); + + pRes = taos_schemaless_insert(taos, (char **)sql, sizeof(sql) / sizeof(sql[0]), TSDB_SML_LINE_PROTOCOL, + TSDB_SML_TIMESTAMP_MILLI_SECONDS); + + int code = taos_errno(pRes); + printf("%s result0:%s\n", __FUNCTION__, taos_errstr(pRes)); + taos_free_result(pRes); + + taos_close(taos); + + return code; +} + int main(int argc, char *argv[]) { if (argc == 2) { taos_options(TSDB_OPTION_CONFIGDIR, argv[1]); @@ -1579,5 +1609,8 @@ int main(int argc, char *argv[]) { ASSERT(!ret); ret = sml_19221_Test(); ASSERT(!ret); + ret = sml_ts3724_Test(); + ASSERT(!ret); + return ret; } diff --git a/utils/tsim/CMakeLists.txt b/utils/tsim/CMakeLists.txt index 81737809d900a8931be71b4b7c605c9f18627d32..c2cf7ac3c5380c4e116e96753fa78f486d918566 100644 --- a/utils/tsim/CMakeLists.txt +++ b/utils/tsim/CMakeLists.txt @@ -2,7 +2,7 @@ aux_source_directory(src TSIM_SRC) add_executable(tsim ${TSIM_SRC}) target_link_libraries( tsim - PUBLIC taos + PUBLIC taos_static PUBLIC util PUBLIC common PUBLIC os