提交 f2a36718 编写于 作者: H hzcheng

Merge branch '2.0' into feature/2.0tsdb

CMAKE_MINIMUM_REQUIRED(VERSION 2.8) CMAKE_MINIMUM_REQUIRED(VERSION 2.8)
PROJECT(TDengine) PROJECT(TDengine)
SET(CMAKE_C_STANDARD 11) SET(TD_CLUSTER FALSE)
SET(CMAKE_VERBOSE_MAKEFILE ON) SET(TD_ACCOUNT FALSE)
SET(TD_GRANT FALSE)
# SET(TD_COVER FALSE)
# If need to set debug options SET(TD_PAGMODE_LITE FALSE)
# 1.Generate debug version:
# mkdir debug; cd debug;
# cmake -DCMAKE_BUILD_TYPE=Debug ..
# 2.Generate release version:
# mkdir release; cd release;
# cmake -DCMAKE_BUILD_TYPE=Release ..
#
#
# If it is a Windows operating system
# 1.Use command line tool of VS2013 or higher version
# mkdir build; cd build;
# cmake -G "NMake Makefiles" ..
# nmake install
# 2.Use the VS development interface tool
# mkdir build; cd build;
# cmake -A x64 ..
# open the file named TDengine.sln
#
SET(TD_GODLL FALSE) SET(TD_GODLL FALSE)
IF (${DLLTYPE} MATCHES "go") SET(TD_MEM_CHECK FALSE)
ADD_DEFINITIONS(-D_TD_GO_DLL_)
MESSAGE(STATUS "input dll type: " ${DLLTYPE})
SET(TD_GODLL TRUE)
ENDIF ()
IF (NOT DEFINED TD_CLUSTER)
MESSAGE(STATUS "Build the Lite Version")
SET(TD_CLUSTER FALSE)
SET(TD_EDGE TRUE)
SET(TD_COMMUNITY_DIR ${PROJECT_SOURCE_DIR})
MESSAGE(STATUS "Community directory: " ${TD_COMMUNITY_DIR})
# Set macro definitions according to os platform
SET(TD_LINUX_64 FALSE)
SET(TD_LINUX_32 FALSE)
SET(TD_ARM FALSE)
SET(TD_ARM_64 FALSE)
SET(TD_ARM_32 FALSE)
SET(TD_MIPS FALSE)
SET(TD_MIPS_64 FALSE)
SET(TD_MIPS_32 FALSE)
SET(TD_DARWIN_64 FALSE)
SET(TD_WINDOWS_64 FALSE)
SET(TD_PAGMODE_LITE FALSE)
IF (${PAGMODE} MATCHES "lite")
SET(TD_PAGMODE_LITE TRUE)
ENDIF ()
# if generate ARM version:
# cmake -DCPUTYPE=aarch32 .. or cmake -DCPUTYPE=aarch64
IF (${CPUTYPE} MATCHES "aarch32")
SET(TD_ARM TRUE)
SET(TD_ARM_32 TRUE)
SET(TD_PAGMODE_LITE TRUE)
ADD_DEFINITIONS(-D_TD_ARM_)
ADD_DEFINITIONS(-D_TD_ARM_32_)
ELSEIF (${CPUTYPE} MATCHES "aarch64")
SET(TD_ARM TRUE)
SET(TD_ARM_64 TRUE)
ADD_DEFINITIONS(-D_TD_ARM_)
ADD_DEFINITIONS(-D_TD_ARM_64_)
ELSEIF (${CPUTYPE} MATCHES "mips64")
SET(TD_MIPS TRUE)
SET(TD_MIPS_64 TRUE)
ADD_DEFINITIONS(-D_TD_MIPS_)
ADD_DEFINITIONS(-D_TD_MIPS_64_)
ELSEIF (${CPUTYPE} MATCHES "x64")
MESSAGE(STATUS "input cpuType: " ${CPUTYPE})
ELSEIF (${CPUTYPE} MATCHES "x86")
MESSAGE(STATUS "input cpuType: " ${CPUTYPE})
ELSE ()
MESSAGE(STATUS "input cpuType: " ${CPUTYPE})
ENDIF ()
#
# Get OS information and store in variable TD_OS_INFO.
#
execute_process(COMMAND chmod 777 ${TD_COMMUNITY_DIR}/packaging/tools/get_os.sh)
execute_process(COMMAND ${TD_COMMUNITY_DIR}/packaging/tools/get_os.sh "" OUTPUT_VARIABLE TD_OS_INFO)
MESSAGE(STATUS "The current os is " ${TD_OS_INFO})
IF (${CMAKE_SYSTEM_NAME} MATCHES "Linux")
IF (${CMAKE_SIZEOF_VOID_P} MATCHES 8)
SET(TD_LINUX_64 TRUE)
SET(TD_OS_DIR ${TD_COMMUNITY_DIR}/src/os/linux)
ADD_DEFINITIONS(-D_M_X64)
MESSAGE(STATUS "The current platform is Linux 64-bit")
ELSEIF (${CMAKE_SIZEOF_VOID_P} MATCHES 4)
IF (TD_ARM)
SET(TD_LINUX_32 TRUE)
SET(TD_OS_DIR ${TD_COMMUNITY_DIR}/src/os/linux)
#ADD_DEFINITIONS(-D_M_IX86)
MESSAGE(STATUS "The current platform is Linux 32-bit")
ELSE ()
MESSAGE(FATAL_ERROR "The current platform is Linux 32-bit, but no ARM not supported yet")
EXIT ()
ENDIF ()
ELSE ()
MESSAGE(FATAL_ERROR "The current platform is Linux neither 32-bit nor 64-bit, not supported yet")
EXIT ()
ENDIF ()
ELSEIF (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
IF (${CMAKE_SIZEOF_VOID_P} MATCHES 8)
SET(TD_DARWIN_64 TRUE)
SET(TD_OS_DIR ${TD_COMMUNITY_DIR}/src/os/darwin)
MESSAGE(STATUS "The current platform is Darwin 64-bit")
ELSE ()
MESSAGE(FATAL_ERROR "The current platform is Darwin 32-bit, not supported yet")
EXIT ()
ENDIF ()
ELSEIF (${CMAKE_SYSTEM_NAME} MATCHES "Windows")
IF (${CMAKE_SIZEOF_VOID_P} MATCHES 8)
SET(TD_WINDOWS_64 TRUE)
SET(TD_OS_DIR ${TD_COMMUNITY_DIR}/src/os/windows)
ADD_DEFINITIONS(-D_M_X64)
MESSAGE(STATUS "The current platform is Windows 64-bit")
ELSE ()
MESSAGE(FATAL_ERROR "The current platform is Windows 32-bit, not supported yet")
EXIT ()
ENDIF ()
ELSE()
MESSAGE(FATAL_ERROR "The current platform is not Linux/Darwin/Windows, stop compile")
EXIT ()
ENDIF ()
FIND_PROGRAM(TD_MVN_INSTALLED mvn)
IF (TD_MVN_INSTALLED)
MESSAGE(STATUS "MVN is installed and JDBC will be compiled")
ELSE ()
MESSAGE(STATUS "MVN is not installed and JDBC is not compiled")
ENDIF ()
#
# debug flag
#
# ADD_DEFINITIONS(-D_CHECK_HEADER_FILE_)
IF (${MEM_CHECK} MATCHES "true")
ADD_DEFINITIONS(-DTAOS_MEM_CHECK)
ENDIF ()
IF (TD_CLUSTER)
ADD_DEFINITIONS(-DCLUSTER)
ADD_DEFINITIONS(-DTSDB_REPLICA_MAX_NUM=3)
ELSE ()
ADD_DEFINITIONS(-DLITE)
ADD_DEFINITIONS(-DTSDB_REPLICA_MAX_NUM=1)
ENDIF ()
IF (TD_LINUX_64)
SET(DEBUG_FLAGS "-O0 -DDEBUG")
SET(RELEASE_FLAGS "-O0")
IF (NOT TD_ARM)
IF (${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")
SET(COMMON_FLAGS "-std=gnu99 -Wall -fPIC -malign-double -g -msse4.2 -D_FILE_OFFSET_BITS=64 -D_LARGE_FILE")
ELSE ()
SET(COMMON_FLAGS "-std=gnu99 -Wall -fPIC -malign-double -g -malign-stringops -msse4.2 -D_FILE_OFFSET_BITS=64 -D_LARGE_FILE")
ENDIF ()
ELSE ()
SET(COMMON_FLAGS "-std=gnu99 -Wall -fPIC -g -fsigned-char -fpack-struct=8 -D_FILE_OFFSET_BITS=64 -D_LARGE_FILE")
ENDIF ()
ADD_DEFINITIONS(-DLINUX)
ADD_DEFINITIONS(-D_REENTRANT -D__USE_POSIX -D_LIBC_REENTRANT)
IF (${TD_OS_INFO} MATCHES "Alpine")
MESSAGE(STATUS "The current OS is Alpine, append extra flags")
SET(COMMON_FLAGS "${COMMON_FLAGS} -largp")
link_libraries(/usr/lib/libargp.a)
ADD_DEFINITIONS(-D_ALPINE)
ENDIF ()
ELSEIF (TD_LINUX_32)
IF (NOT TD_ARM)
EXIT ()
ENDIF ()
SET(DEBUG_FLAGS "-O0 -DDEBUG")
SET(RELEASE_FLAGS "-O0")
SET(COMMON_FLAGS "-std=gnu99 -Wall -fPIC -g -fsigned-char -munaligned-access -fpack-struct=8 -latomic -D_FILE_OFFSET_BITS=64 -D_LARGE_FILE")
ADD_DEFINITIONS(-DLINUX)
ADD_DEFINITIONS(-D_REENTRANT -D__USE_POSIX -D_LIBC_REENTRANT)
ADD_DEFINITIONS(-DUSE_LIBICONV)
IF (${TD_OS_INFO} MATCHES "Alpine")
MESSAGE(STATUS "The current OS is Alpine, add extra flags")
SET(COMMON_FLAGS "${COMMON_FLAGS} -largp")
link_library(/usr/lib/libargp.a)
ADD_DEFINITIONS(-D_ALPINE)
ENDIF ()
ELSEIF (TD_WINDOWS_64)
SET(CMAKE_GENERATOR "NMake Makefiles" CACHE INTERNAL "" FORCE)
IF (NOT TD_GODLL)
SET(COMMON_FLAGS "/nologo /WX- /Oi /Oy- /Gm- /EHsc /MT /GS /Gy /fp:precise /Zc:wchar_t /Zc:forScope /Gd /errorReport:prompt /analyze-")
SET(DEBUG_FLAGS "/Zi /W3 /GL")
SET(RELEASE_FLAGS "/W0 /GL")
ENDIF ()
ADD_DEFINITIONS(-DWINDOWS)
ADD_DEFINITIONS(-D__CLEANUP_C)
ADD_DEFINITIONS(-DPTW32_STATIC_LIB)
ADD_DEFINITIONS(-DPTW32_BUILD)
ADD_DEFINITIONS(-D_MBCS -D_CRT_SECURE_NO_DEPRECATE -D_CRT_NONSTDC_NO_DEPRECATE)
ELSEIF (TD_DARWIN_64)
SET(COMMON_FLAGS "-std=gnu99 -Wall -fPIC -malign-double -g -msse4.2 -D_FILE_OFFSET_BITS=64 -D_LARGE_FILE")
SET(DEBUG_FLAGS "-O0 -DDEBUG")
SET(RELEASE_FLAGS "-O0")
ADD_DEFINITIONS(-DDARWIN)
ADD_DEFINITIONS(-D_REENTRANT -D__USE_POSIX -D_LIBC_REENTRANT)
ELSE ()
MESSAGE(FATAL_ERROR "The current platform is not support yet, stop compile")
EXIT ()
ENDIF ()
# Set compiler options
SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} ${COMMON_FLAGS} ${DEBUG_FLAGS}")
SET(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} ${COMMON_FLAGS} ${RELEASE_FLAGS}")
# Set c++ compiler options
# SET(COMMON_CXX_FLAGS "${COMMON_FLAGS} -std=c++11")
# SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${COMMON_CXX_FLAGS} ${DEBUG_FLAGS}")
# SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${COMMON_CXX_FLAGS} ${RELEASE_FLAGS}")
IF (${CMAKE_BUILD_TYPE} MATCHES "Debug")
MESSAGE(STATUS "Build Debug Version")
ELSEIF (${CMAKE_BUILD_TYPE} MATCHES "Release")
MESSAGE(STATUS "Build Release Version")
ELSE ()
IF (TD_WINDOWS_64)
SET(CMAKE_BUILD_TYPE "Release")
MESSAGE(STATUS "Build Release Version in Windows as default")
ELSE ()
SET(CMAKE_BUILD_TYPE "Debug")
MESSAGE(STATUS "Build Debug Version as default")
ENDIF()
ENDIF ()
#set output directory
SET(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/build/lib)
SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/build/bin)
SET(TD_TESTS_OUTPUT_DIR ${PROJECT_BINARY_DIR}/test)
MESSAGE(STATUS "Operating system dependency directory: " ${TD_OS_DIR}) SET(TD_COMMUNITY_DIR ${PROJECT_SOURCE_DIR})
MESSAGE(STATUS "Project source directory: " ${PROJECT_SOURCE_DIR}) MESSAGE(STATUS "Community directory: " ${TD_COMMUNITY_DIR})
MESSAGE(STATUS "Project binary files output path: " ${PROJECT_BINARY_DIR})
MESSAGE(STATUS "Project executable files output path: " ${EXECUTABLE_OUTPUT_PATH})
MESSAGE(STATUS "Project library files output path: " ${LIBRARY_OUTPUT_PATH})
IF (TD_LINUX_64) INCLUDE(cmake/input.inc)
SET(TD_MAKE_INSTALL_SH "${TD_COMMUNITY_DIR}/packaging/tools/make_install.sh") INCLUDE(cmake/platform.inc)
INSTALL(CODE "MESSAGE(\"make install script: ${TD_MAKE_INSTALL_SH}\")") INCLUDE(cmake/env.inc)
INSTALL(CODE "execute_process(COMMAND chmod 777 ${TD_MAKE_INSTALL_SH})") INCLUDE(cmake/define.inc)
INSTALL(CODE "execute_process(COMMAND ${TD_MAKE_INSTALL_SH} ${TD_COMMUNITY_DIR} ${PROJECT_BINARY_DIR})") INCLUDE(cmake/install.inc)
ELSEIF (TD_LINUX_32)
IF (NOT TD_ARM)
EXIT ()
ENDIF ()
SET(TD_MAKE_INSTALL_SH "${TD_COMMUNITY_DIR}/packaging/tools/make_install.sh")
INSTALL(CODE "MESSAGE(\"make install script: ${TD_MAKE_INSTALL_SH}\")")
INSTALL(CODE "execute_process(COMMAND chmod 777 ${TD_MAKE_INSTALL_SH})")
INSTALL(CODE "execute_process(COMMAND ${TD_MAKE_INSTALL_SH} ${TD_COMMUNITY_DIR} ${PROJECT_BINARY_DIR})")
ELSEIF (TD_WINDOWS_64)
SET(CMAKE_INSTALL_PREFIX C:/TDengine)
IF (NOT TD_GODLL)
INSTALL(DIRECTORY ${TD_COMMUNITY_DIR}/src/connector/go DESTINATION connector)
INSTALL(DIRECTORY ${TD_COMMUNITY_DIR}/src/connector/grafana DESTINATION connector)
INSTALL(DIRECTORY ${TD_COMMUNITY_DIR}/src/connector/python DESTINATION connector)
INSTALL(DIRECTORY ${TD_COMMUNITY_DIR}/tests/examples DESTINATION .)
INSTALL(DIRECTORY ${TD_COMMUNITY_DIR}/packaging/cfg DESTINATION .)
INSTALL(FILES ${TD_COMMUNITY_DIR}/src/inc/taos.h DESTINATION include)
INSTALL(FILES ${LIBRARY_OUTPUT_PATH}/taos.lib DESTINATION driver)
INSTALL(FILES ${LIBRARY_OUTPUT_PATH}/taos.exp DESTINATION driver)
INSTALL(FILES ${LIBRARY_OUTPUT_PATH}/taos.dll DESTINATION driver)
INSTALL(FILES ${EXECUTABLE_OUTPUT_PATH}/taos.exe DESTINATION .)
#INSTALL(TARGETS taos RUNTIME DESTINATION driver)
#INSTALL(TARGETS shell RUNTIME DESTINATION .)
IF (TD_MVN_INSTALLED)
INSTALL(FILES ${LIBRARY_OUTPUT_PATH}/taos-jdbcdriver-1.0.2-dist.jar DESTINATION connector/jdbc)
ENDIF ()
ELSE ()
INSTALL(FILES ${LIBRARY_OUTPUT_PATH}/libtaos.dll DESTINATION driver)
INSTALL(FILES ${LIBRARY_OUTPUT_PATH}/libtaos.dll.a DESTINATION driver)
ENDIF ()
ELSEIF (TD_DARWIN_64)
SET(TD_MAKE_INSTALL_SH "${TD_COMMUNITY_DIR}/packaging/tools/make_install.sh")
INSTALL(CODE "MESSAGE(\"make install script: ${TD_MAKE_INSTALL_SH}\")")
INSTALL(CODE "execute_process(COMMAND chmod 777 ${TD_MAKE_INSTALL_SH})")
INSTALL(CODE "execute_process(COMMAND ${TD_MAKE_INSTALL_SH} ${TD_COMMUNITY_DIR} ${PROJECT_BINARY_DIR} Darwin)")
ENDIF ()
ENDIF ()
ADD_SUBDIRECTORY(deps) ADD_SUBDIRECTORY(deps)
ADD_SUBDIRECTORY(src) ADD_SUBDIRECTORY(src)
......
CMAKE_MINIMUM_REQUIRED(VERSION 2.8)
PROJECT(TDengine)
IF (TD_CLUSTER)
ADD_DEFINITIONS(-D_CLUSTER)
ADD_DEFINITIONS(-DTSDB_REPLICA_MAX_NUM=3)
ELSE ()
ADD_DEFINITIONS(-DLITE)
ADD_DEFINITIONS(-DTSDB_REPLICA_MAX_NUM=1)
ENDIF ()
IF (TD_ACCOUNT)
ADD_DEFINITIONS(-D_ACCOUNT)
ENDIF ()
IF (TD_GRANT)
ADD_DEFINITIONS(-D_GRANT)
ENDIF ()
IF (TD_GODLL)
ADD_DEFINITIONS(-D_TD_GO_DLL_)
ENDIF ()
IF (TD_MEM_CHECK)
ADD_DEFINITIONS(-DTAOS_MEM_CHECK)
ENDIF ()
CMAKE_MINIMUM_REQUIRED(VERSION 2.8)
PROJECT(TDengine)
SET(CMAKE_C_STANDARD 11)
SET(CMAKE_VERBOSE_MAKEFILE ON)
#set output directory
SET(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/build/lib)
SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/build/bin)
SET(TD_TESTS_OUTPUT_DIR ${PROJECT_BINARY_DIR}/test)
MESSAGE(STATUS "Operating system dependency directory: " ${TD_OS_DIR})
MESSAGE(STATUS "Project source directory: " ${PROJECT_SOURCE_DIR})
MESSAGE(STATUS "Project binary files output path: " ${PROJECT_BINARY_DIR})
MESSAGE(STATUS "Project executable files output path: " ${EXECUTABLE_OUTPUT_PATH})
MESSAGE(STATUS "Project library files output path: " ${LIBRARY_OUTPUT_PATH})
FIND_PROGRAM(TD_MVN_INSTALLED mvn)
IF (TD_MVN_INSTALLED)
MESSAGE(STATUS "MVN is installed and JDBC will be compiled")
ELSE ()
MESSAGE(STATUS "MVN is not installed and JDBC is not compiled")
ENDIF ()
#
# If need to set debug options
# 1.Generate debug version:
# mkdir debug; cd debug;
# cmake -DCMAKE_BUILD_TYPE=Debug ..
# 2.Generate release version:
# mkdir release; cd release;
# cmake -DCMAKE_BUILD_TYPE=Release ..
#
# Set compiler options
SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} ${COMMON_FLAGS} ${DEBUG_FLAGS}")
SET(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} ${COMMON_FLAGS} ${RELEASE_FLAGS}")
# Set c++ compiler options
# SET(COMMON_CXX_FLAGS "${COMMON_FLAGS} -std=c++11")
# SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${COMMON_CXX_FLAGS} ${DEBUG_FLAGS}")
# SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${COMMON_CXX_FLAGS} ${RELEASE_FLAGS}")
IF (${CMAKE_BUILD_TYPE} MATCHES "Debug")
MESSAGE(STATUS "Build Debug Version")
ELSEIF (${CMAKE_BUILD_TYPE} MATCHES "Release")
MESSAGE(STATUS "Build Release Version")
ELSE ()
IF (TD_WINDOWS_64)
SET(CMAKE_BUILD_TYPE "Release")
MESSAGE(STATUS "Build Release Version in Windows as default")
ELSE ()
SET(CMAKE_BUILD_TYPE "Debug")
MESSAGE(STATUS "Build Debug Version as default")
ENDIF()
ENDIF ()
\ No newline at end of file
CMAKE_MINIMUM_REQUIRED(VERSION 2.8)
PROJECT(TDengine)
IF (${CLUSTER} MATCHES "true")
SET(TD_CLUSTER TRUE)
MESSAGE(STATUS "Build with cluster plugins")
ELSEIF (${CLUSTER} MATCHES "false")
SET(TD_CLUSTER FALSE)
MESSAGE(STATUS "Build without cluster plugins")
ENDIF ()
IF (${ACCOUNT} MATCHES "true")
SET(TD_ACCOUNT TRUE)
MESSAGE(STATUS "Build with account plugins")
ELSEIF (${ACCOUNT} MATCHES "false")
SET(TD_ACCOUNT FALSE)
MESSAGE(STATUS "Build without account plugins")
ENDIF ()
IF (${COVER} MATCHES "true")
SET(TD_COVER TRUE)
MESSAGE(STATUS "Build with test coverage")
ELSEIF (${COVER} MATCHES "false")
SET(TD_COVER FALSE)
MESSAGE(STATUS "Build without test coverage")
ENDIF ()
IF (${PAGMODE} MATCHES "lite")
SET(TD_PAGMODE_LITE TRUE)
MESSAGE(STATUS "Build with pagmode lite")
ENDIF ()
IF (${DLLTYPE} MATCHES "go")
SET(TD_GODLL TRUE)
MESSAGE(STATUS "input dll type: " ${DLLTYPE})
ENDIF ()
IF (${MEM_CHECK} MATCHES "true")
SET(TD_MEM_CHECK TRUE)
MESSAGE(STATUS "build with memory check")
ENDIF ()
\ No newline at end of file
IF (TD_LINUX_64)
SET(TD_MAKE_INSTALL_SH "${TD_COMMUNITY_DIR}/packaging/tools/make_install.sh")
INSTALL(CODE "MESSAGE(\"make install script: ${TD_MAKE_INSTALL_SH}\")")
INSTALL(CODE "execute_process(COMMAND chmod 777 ${TD_MAKE_INSTALL_SH})")
INSTALL(CODE "execute_process(COMMAND ${TD_MAKE_INSTALL_SH} ${TD_COMMUNITY_DIR} ${PROJECT_BINARY_DIR})")
ELSEIF (TD_LINUX_32)
IF (NOT TD_ARM)
EXIT ()
ENDIF ()
SET(TD_MAKE_INSTALL_SH "${TD_COMMUNITY_DIR}/packaging/tools/make_install.sh")
INSTALL(CODE "MESSAGE(\"make install script: ${TD_MAKE_INSTALL_SH}\")")
INSTALL(CODE "execute_process(COMMAND chmod 777 ${TD_MAKE_INSTALL_SH})")
INSTALL(CODE "execute_process(COMMAND ${TD_MAKE_INSTALL_SH} ${TD_COMMUNITY_DIR} ${PROJECT_BINARY_DIR})")
ELSEIF (TD_WINDOWS_64)
SET(CMAKE_INSTALL_PREFIX C:/TDengine)
IF (NOT TD_GODLL)
INSTALL(DIRECTORY ${TD_COMMUNITY_DIR}/src/connector/go DESTINATION connector)
INSTALL(DIRECTORY ${TD_COMMUNITY_DIR}/src/connector/grafana DESTINATION connector)
INSTALL(DIRECTORY ${TD_COMMUNITY_DIR}/src/connector/python DESTINATION connector)
INSTALL(DIRECTORY ${TD_COMMUNITY_DIR}/tests/examples DESTINATION .)
INSTALL(DIRECTORY ${TD_COMMUNITY_DIR}/packaging/cfg DESTINATION .)
INSTALL(FILES ${TD_COMMUNITY_DIR}/src/inc/taos.h DESTINATION include)
INSTALL(FILES ${LIBRARY_OUTPUT_PATH}/taos.lib DESTINATION driver)
INSTALL(FILES ${LIBRARY_OUTPUT_PATH}/taos.exp DESTINATION driver)
INSTALL(FILES ${LIBRARY_OUTPUT_PATH}/taos.dll DESTINATION driver)
INSTALL(FILES ${EXECUTABLE_OUTPUT_PATH}/taos.exe DESTINATION .)
#INSTALL(TARGETS taos RUNTIME DESTINATION driver)
#INSTALL(TARGETS shell RUNTIME DESTINATION .)
IF (TD_MVN_INSTALLED)
INSTALL(FILES ${LIBRARY_OUTPUT_PATH}/taos-jdbcdriver-1.0.2-dist.jar DESTINATION connector/jdbc)
ENDIF ()
ELSE ()
INSTALL(FILES ${LIBRARY_OUTPUT_PATH}/libtaos.dll DESTINATION driver)
INSTALL(FILES ${LIBRARY_OUTPUT_PATH}/libtaos.dll.a DESTINATION driver)
ENDIF ()
ELSEIF (TD_DARWIN_64)
SET(TD_MAKE_INSTALL_SH "${TD_COMMUNITY_DIR}/packaging/tools/make_install.sh")
INSTALL(CODE "MESSAGE(\"make install script: ${TD_MAKE_INSTALL_SH}\")")
INSTALL(CODE "execute_process(COMMAND chmod 777 ${TD_MAKE_INSTALL_SH})")
INSTALL(CODE "execute_process(COMMAND ${TD_MAKE_INSTALL_SH} ${TD_COMMUNITY_DIR} ${PROJECT_BINARY_DIR} Darwin)")
ENDIF ()
\ No newline at end of file
CMAKE_MINIMUM_REQUIRED(VERSION 2.8)
PROJECT(TDengine)
#
# If it is a Windows operating system
# 1.Use command line tool of VS2013 or higher version
# mkdir build; cd build;
# cmake -G "NMake Makefiles" ..
# nmake install
# 2.Use the VS development interface tool
# mkdir build; cd build;
# cmake -A x64 ..
# open the file named TDengine.sln
#
# Set macro definitions according to os platform
SET(TD_LINUX_64 FALSE)
SET(TD_LINUX_32 FALSE)
SET(TD_ARM FALSE)
SET(TD_ARM_64 FALSE)
SET(TD_ARM_32 FALSE)
SET(TD_MIPS FALSE)
SET(TD_MIPS_64 FALSE)
SET(TD_MIPS_32 FALSE)
SET(TD_DARWIN_64 FALSE)
SET(TD_WINDOWS_64 FALSE)
# if generate ARM version:
# cmake -DCPUTYPE=aarch32 .. or cmake -DCPUTYPE=aarch64
IF (${CPUTYPE} MATCHES "aarch32")
SET(TD_ARM TRUE)
SET(TD_ARM_32 TRUE)
SET(TD_PAGMODE_LITE TRUE)
ADD_DEFINITIONS(-D_TD_ARM_)
ADD_DEFINITIONS(-D_TD_ARM_32_)
ELSEIF (${CPUTYPE} MATCHES "aarch64")
SET(TD_ARM TRUE)
SET(TD_ARM_64 TRUE)
ADD_DEFINITIONS(-D_TD_ARM_)
ADD_DEFINITIONS(-D_TD_ARM_64_)
ELSEIF (${CPUTYPE} MATCHES "mips64")
SET(TD_MIPS TRUE)
SET(TD_MIPS_64 TRUE)
ADD_DEFINITIONS(-D_TD_MIPS_)
ADD_DEFINITIONS(-D_TD_MIPS_64_)
ELSEIF (${CPUTYPE} MATCHES "x64")
MESSAGE(STATUS "input cpuType: " ${CPUTYPE})
ELSEIF (${CPUTYPE} MATCHES "x86")
MESSAGE(STATUS "input cpuType: " ${CPUTYPE})
ELSE ()
MESSAGE(STATUS "input cpuType: " ${CPUTYPE})
ENDIF ()
#
# Get OS information and store in variable TD_OS_INFO.
#
execute_process(COMMAND chmod 777 ${TD_COMMUNITY_DIR}/packaging/tools/get_os.sh)
execute_process(COMMAND ${TD_COMMUNITY_DIR}/packaging/tools/get_os.sh "" OUTPUT_VARIABLE TD_OS_INFO)
MESSAGE(STATUS "The current os is " ${TD_OS_INFO})
IF (${CMAKE_SYSTEM_NAME} MATCHES "Linux")
IF (${CMAKE_SIZEOF_VOID_P} MATCHES 8)
SET(TD_LINUX_64 TRUE)
SET(TD_OS_DIR ${TD_COMMUNITY_DIR}/src/os/linux)
ADD_DEFINITIONS(-D_M_X64)
MESSAGE(STATUS "The current platform is Linux 64-bit")
ELSEIF (${CMAKE_SIZEOF_VOID_P} MATCHES 4)
IF (TD_ARM)
SET(TD_LINUX_32 TRUE)
SET(TD_OS_DIR ${TD_COMMUNITY_DIR}/src/os/linux)
#ADD_DEFINITIONS(-D_M_IX86)
MESSAGE(STATUS "The current platform is Linux 32-bit")
ELSE ()
MESSAGE(FATAL_ERROR "The current platform is Linux 32-bit, but no ARM not supported yet")
EXIT ()
ENDIF ()
ELSE ()
MESSAGE(FATAL_ERROR "The current platform is Linux neither 32-bit nor 64-bit, not supported yet")
EXIT ()
ENDIF ()
ELSEIF (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
IF (${CMAKE_SIZEOF_VOID_P} MATCHES 8)
SET(TD_DARWIN_64 TRUE)
SET(TD_OS_DIR ${TD_COMMUNITY_DIR}/src/os/darwin)
MESSAGE(STATUS "The current platform is Darwin 64-bit")
ELSE ()
MESSAGE(FATAL_ERROR "The current platform is Darwin 32-bit, not supported yet")
EXIT ()
ENDIF ()
ELSEIF (${CMAKE_SYSTEM_NAME} MATCHES "Windows")
IF (${CMAKE_SIZEOF_VOID_P} MATCHES 8)
SET(TD_WINDOWS_64 TRUE)
SET(TD_OS_DIR ${TD_COMMUNITY_DIR}/src/os/windows)
ADD_DEFINITIONS(-D_M_X64)
MESSAGE(STATUS "The current platform is Windows 64-bit")
ELSE ()
MESSAGE(FATAL_ERROR "The current platform is Windows 32-bit, not supported yet")
EXIT ()
ENDIF ()
ELSE()
MESSAGE(FATAL_ERROR "The current platform is not Linux/Darwin/Windows, stop compile")
EXIT ()
ENDIF ()
IF (TD_LINUX_64)
SET(DEBUG_FLAGS "-O0 -DDEBUG")
SET(RELEASE_FLAGS "-O0")
IF (NOT TD_ARM)
IF (${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")
SET(COMMON_FLAGS "-std=gnu99 -Wall -fPIC -malign-double -g -msse4.2 -D_FILE_OFFSET_BITS=64 -D_LARGE_FILE")
ELSE ()
SET(COMMON_FLAGS "-std=gnu99 -Wall -fPIC -malign-double -g -malign-stringops -msse4.2 -D_FILE_OFFSET_BITS=64 -D_LARGE_FILE")
ENDIF ()
ELSE ()
SET(COMMON_FLAGS "-std=gnu99 -Wall -fPIC -g -fsigned-char -fpack-struct=8 -D_FILE_OFFSET_BITS=64 -D_LARGE_FILE")
ENDIF ()
ADD_DEFINITIONS(-DLINUX)
ADD_DEFINITIONS(-D_REENTRANT -D__USE_POSIX -D_LIBC_REENTRANT)
IF (${TD_OS_INFO} MATCHES "Alpine")
MESSAGE(STATUS "The current OS is Alpine, append extra flags")
SET(COMMON_FLAGS "${COMMON_FLAGS} -largp")
link_libraries(/usr/lib/libargp.a)
ADD_DEFINITIONS(-D_ALPINE)
ENDIF ()
ELSEIF (TD_LINUX_32)
IF (NOT TD_ARM)
EXIT ()
ENDIF ()
SET(DEBUG_FLAGS "-O0 -DDEBUG")
SET(RELEASE_FLAGS "-O0")
SET(COMMON_FLAGS "-std=gnu99 -Wall -fPIC -g -fsigned-char -munaligned-access -fpack-struct=8 -latomic -D_FILE_OFFSET_BITS=64 -D_LARGE_FILE")
ADD_DEFINITIONS(-DLINUX)
ADD_DEFINITIONS(-D_REENTRANT -D__USE_POSIX -D_LIBC_REENTRANT)
ADD_DEFINITIONS(-DUSE_LIBICONV)
IF (${TD_OS_INFO} MATCHES "Alpine")
MESSAGE(STATUS "The current OS is Alpine, add extra flags")
SET(COMMON_FLAGS "${COMMON_FLAGS} -largp")
link_library(/usr/lib/libargp.a)
ADD_DEFINITIONS(-D_ALPINE)
ENDIF ()
ELSEIF (TD_WINDOWS_64)
SET(CMAKE_GENERATOR "NMake Makefiles" CACHE INTERNAL "" FORCE)
IF (NOT TD_GODLL)
SET(COMMON_FLAGS "/nologo /WX- /Oi /Oy- /Gm- /EHsc /MT /GS /Gy /fp:precise /Zc:wchar_t /Zc:forScope /Gd /errorReport:prompt /analyze-")
SET(DEBUG_FLAGS "/Zi /W3 /GL")
SET(RELEASE_FLAGS "/W0 /GL")
ENDIF ()
ADD_DEFINITIONS(-DWINDOWS)
ADD_DEFINITIONS(-D__CLEANUP_C)
ADD_DEFINITIONS(-DPTW32_STATIC_LIB)
ADD_DEFINITIONS(-DPTW32_BUILD)
ADD_DEFINITIONS(-D_MBCS -D_CRT_SECURE_NO_DEPRECATE -D_CRT_NONSTDC_NO_DEPRECATE)
ELSEIF (TD_DARWIN_64)
SET(COMMON_FLAGS "-std=gnu99 -Wall -fPIC -malign-double -g -msse4.2 -D_FILE_OFFSET_BITS=64 -D_LARGE_FILE")
SET(DEBUG_FLAGS "-O0 -DDEBUG")
SET(RELEASE_FLAGS "-O0")
ADD_DEFINITIONS(-DDARWIN)
ADD_DEFINITIONS(-D_REENTRANT -D__USE_POSIX -D_LIBC_REENTRANT)
ELSE ()
MESSAGE(FATAL_ERROR "The current platform is not support yet, stop compile")
EXIT ()
ENDIF ()
\ No newline at end of file
...@@ -15,9 +15,16 @@ IF ((TD_LINUX_64) OR (TD_LINUX_32 AND TD_ARM)) ...@@ -15,9 +15,16 @@ IF ((TD_LINUX_64) OR (TD_LINUX_32 AND TD_ARM))
ADD_EXECUTABLE(taosd ${SRC}) ADD_EXECUTABLE(taosd ${SRC})
TARGET_LINK_LIBRARIES(taosd mnode taos_static monitor http tsdb) TARGET_LINK_LIBRARIES(taosd mnode taos_static monitor http tsdb)
#IF (TD_CLUSTER) IF (TD_ACCOUNT)
# TARGET_LINK_LIBRARIES(taosd dcluster) TARGET_LINK_LIBRARIES(taosd account)
#ENDIF () ENDIF ()
IF (TD_GRANT)
TARGET_LINK_LIBRARIES(taosd grant)
ENDIF ()
IF (TD_CLUSTER)
TARGET_LINK_LIBRARIES(taosd cluster)
ENDIF ()
SET(PREPARE_ENV_CMD "prepare_env_cmd") SET(PREPARE_ENV_CMD "prepare_env_cmd")
SET(PREPARE_ENV_TARGET "prepare_env_target") SET(PREPARE_ENV_TARGET "prepare_env_target")
......
...@@ -28,20 +28,9 @@ ...@@ -28,20 +28,9 @@
#include "dnodeRead.h" #include "dnodeRead.h"
#include "dnodeShell.h" #include "dnodeShell.h"
#include "dnodeWrite.h" #include "dnodeWrite.h"
#ifdef CLUSTER
#include "account.h"
#include "admin.h"
#include "balance.h"
#include "cluster.h"
#include "grant.h"
#include "mpeer.h"
#include "storage.h"
#include "vpeer.h"
#endif
static int32_t dnodeInitSystem(); static int32_t dnodeInitSystem();
static int32_t dnodeInitStorage(); static int32_t dnodeInitStorage();
static void dnodeInitPlugins();
static void dnodeCleanupStorage(); static void dnodeCleanupStorage();
static void dnodeCleanUpSystem(); static void dnodeCleanUpSystem();
static void dnodeSetRunStatus(SDnodeRunStatus status); static void dnodeSetRunStatus(SDnodeRunStatus status);
...@@ -51,8 +40,6 @@ static SDnodeRunStatus tsDnodeRunStatus = TSDB_DNODE_RUN_STATUS_STOPPED; ...@@ -51,8 +40,6 @@ static SDnodeRunStatus tsDnodeRunStatus = TSDB_DNODE_RUN_STATUS_STOPPED;
void (*dnodeParseParameterKFp)() = NULL; void (*dnodeParseParameterKFp)() = NULL;
int32_t main(int32_t argc, char *argv[]) { int32_t main(int32_t argc, char *argv[]) {
dnodeInitPlugins();
// Set global configuration file // Set global configuration file
for (int32_t i = 1; i < argc; ++i) { for (int32_t i = 1; i < argc; ++i) {
if (strcmp(argv[i], "-c") == 0) { if (strcmp(argv[i], "-c") == 0) {
...@@ -244,15 +231,3 @@ static int32_t dnodeInitStorage() { ...@@ -244,15 +231,3 @@ static int32_t dnodeInitStorage() {
} }
static void dnodeCleanupStorage() {} static void dnodeCleanupStorage() {}
static void dnodeInitPlugins() {
#ifdef CLUSTER
// acctInit();
// adminInit();
// balanceInit();
// clusterInit();
// grantInit();
// mpeerInit();
// storageInit();
#endif
}
...@@ -38,6 +38,10 @@ extern "C" { ...@@ -38,6 +38,10 @@ extern "C" {
#include "ttimer.h" #include "ttimer.h"
#include "tutil.h" #include "tutil.h"
struct _vg_obj;
struct _db_obj;
struct _acctObj;
typedef struct { typedef struct {
int32_t mnodeId; int32_t mnodeId;
uint32_t privateIp; uint32_t privateIp;
...@@ -103,8 +107,6 @@ typedef struct { ...@@ -103,8 +107,6 @@ typedef struct {
int8_t dirty; int8_t dirty;
} STableInfo; } STableInfo;
struct _vg_obj;
typedef struct SSuperTableObj { typedef struct SSuperTableObj {
STableInfo info; STableInfo info;
uint64_t uid; uint64_t uid;
...@@ -137,8 +139,6 @@ typedef struct { ...@@ -137,8 +139,6 @@ typedef struct {
SSuperTableObj *superTable; SSuperTableObj *superTable;
} SChildTableObj; } SChildTableObj;
struct _db_obj;
typedef struct _vg_obj { typedef struct _vg_obj {
uint32_t vgId; uint32_t vgId;
char dbName[TSDB_DB_NAME_LEN + 1]; char dbName[TSDB_DB_NAME_LEN + 1];
...@@ -170,10 +170,9 @@ typedef struct _db_obj { ...@@ -170,10 +170,9 @@ typedef struct _db_obj {
int32_t numOfSuperTables; int32_t numOfSuperTables;
SVgObj *pHead; SVgObj *pHead;
SVgObj *pTail; SVgObj *pTail;
struct _acctObj *pAcct;
} SDbObj; } SDbObj;
struct _acctObj;
typedef struct _user_obj { typedef struct _user_obj {
char user[TSDB_USER_LEN + 1]; char user[TSDB_USER_LEN + 1];
char pass[TSDB_KEY_LEN + 1]; char pass[TSDB_KEY_LEN + 1];
...@@ -213,7 +212,8 @@ typedef struct _acctObj { ...@@ -213,7 +212,8 @@ typedef struct _acctObj {
SAcctCfg cfg; SAcctCfg cfg;
int32_t acctId; int32_t acctId;
int64_t createdTime; int64_t createdTime;
int8_t reserved[15]; int8_t dirty;
int8_t reserved[14];
int8_t updateEnd[1]; int8_t updateEnd[1];
SAcctInfo acctInfo; SAcctInfo acctInfo;
SDbObj * pHead; SDbObj * pHead;
......
...@@ -14,10 +14,6 @@ IF ((TD_LINUX_64) OR (TD_LINUX_32 AND TD_ARM)) ...@@ -14,10 +14,6 @@ IF ((TD_LINUX_64) OR (TD_LINUX_32 AND TD_ARM))
ADD_LIBRARY(mnode ${SRC}) ADD_LIBRARY(mnode ${SRC})
TARGET_LINK_LIBRARIES(mnode trpc tutil pthread) TARGET_LINK_LIBRARIES(mnode trpc tutil pthread)
IF (TD_CLUSTER)
TARGET_LINK_LIBRARIES(mnode)
ENDIF ()
ENDIF () ENDIF ()
...@@ -21,18 +21,21 @@ extern "C" { ...@@ -21,18 +21,21 @@ extern "C" {
#endif #endif
#include "mnode.h" #include "mnode.h"
int32_t mgmtInitAccts(); typedef enum {
void mgmtCleanUpAccts(); TSDB_ACCT_USER,
SAcctObj *mgmtGetAcct(char *acctName); TSDB_ACCT_DB,
TSDB_ACCT_TABLE
} EAcctGrantType;
int32_t mgmtCheckUserLimit(SAcctObj *pAcct); int32_t acctInit();
int32_t mgmtCheckDbLimit(SAcctObj *pAcct); void acctCleanUp();
int32_t mgmtCheckTableLimit(SAcctObj *pAcct, int32_t numOfTimeSeries); SAcctObj *acctGetAcct(char *acctName);
int32_t acctCheck(SAcctObj *pAcct, EAcctGrantType type);
int32_t mgmtAddDbIntoAcct(SAcctObj *pAcct, SDbObj *pDb); int32_t acctAddDb(SAcctObj *pAcct, SDbObj *pDb);
int32_t mgmtRemoveDbFromAcct(SAcctObj *pAcct, SDbObj *pDb); int32_t acctRemoveDb(SAcctObj *pAcct, SDbObj *pDb);
int32_t mgmtAddUserIntoAcct(SAcctObj *pAcct, SUserObj *pUser); int32_t acctAddUser(SAcctObj *pAcct, SUserObj *pUser);
int32_t mgmtRemoveUserFromAcct(SAcctObj *pAcct, SUserObj *pUser); int32_t acctRemoveUser(SAcctObj *pAcct, SUserObj *pUser);
#ifdef __cplusplus #ifdef __cplusplus
} }
......
...@@ -28,6 +28,7 @@ void mgmtCleanUpDbs(); ...@@ -28,6 +28,7 @@ void mgmtCleanUpDbs();
SDbObj *mgmtGetDb(char *db); SDbObj *mgmtGetDb(char *db);
SDbObj *mgmtGetDbByTableId(char *db); SDbObj *mgmtGetDbByTableId(char *db);
bool mgmtCheckIsMonitorDB(char *db, char *monitordb); bool mgmtCheckIsMonitorDB(char *db, char *monitordb);
void mgmtDropAllDbs(SAcctObj *pAcct);
// util func // util func
void mgmtAddSuperTableIntoDb(SDbObj *pDb); void mgmtAddSuperTableIntoDb(SDbObj *pDb);
......
...@@ -24,19 +24,9 @@ extern "C" { ...@@ -24,19 +24,9 @@ extern "C" {
int32_t mgmtInitDnodes(); int32_t mgmtInitDnodes();
void mgmtCleanUpDnodes(); void mgmtCleanUpDnodes();
int32_t mgmtGetDnodesNum(); int32_t mgmtGetDnodesNum();
int32_t mgmtUpdateDnode(SDnodeObj *pDnode);
SDnodeObj* mgmtGetDnode(int32_t dnodeId); SDnodeObj* mgmtGetDnode(int32_t dnodeId);
SDnodeObj* mgmtGetDnodeByIp(uint32_t ip); SDnodeObj* mgmtGetDnodeByIp(uint32_t ip);
bool mgmtCheckDnodeInRemoveState(SDnodeObj *pDnode);
bool mgmtCheckDnodeInOfflineState(SDnodeObj *pDnode);
bool mgmtCheckModuleInDnode(SDnodeObj *pDnode, int32_t moduleType);
void mgmtSetDnodeUnRemove(SDnodeObj *pDnode);
void mgmtSetDnodeMaxVnodes(SDnodeObj *pDnode);
void mgmtCalcNumOfFreeVnodes(SDnodeObj *pDnode);
void mgmtSetDnodeVgid(SVnodeGid vnodeGid[], int32_t numOfVnodes, int32_t vgId);
void mgmtUnSetDnodeVgid(SVnodeGid vnodeGid[], int32_t numOfVnodes);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
......
...@@ -19,14 +19,30 @@ ...@@ -19,14 +19,30 @@
#ifdef __cplusplus #ifdef __cplusplus
"C" { "C" {
#endif #endif
#include "mnode.h"
bool mgmtCheckExpired(); typedef enum {
void mgmtAddTimeSeries(SAcctObj *pAcct, uint32_t timeSeriesNum); TSDB_GRANT_ALL,
void mgmtRestoreTimeSeries(SAcctObj *pAcct, uint32_t timeseries); TSDB_GRANT_TIME,
int32_t mgmtCheckTimeSeries(uint32_t timeseries); TSDB_GRANT_USER,
int32_t mgmtCheckUserGrant(); TSDB_GRANT_DB,
int32_t mgmtCheckDbGrant(); TSDB_GRANT_TIMESERIES,
TSDB_GRANT_DNODE,
TSDB_GRANT_ACCT,
TSDB_GRANT_STORAGE,
TSDB_GRANT_SPEED,
TSDB_GRANT_QUERY_TIME,
TSDB_GRANT_CONNS,
TSDB_GRANT_STREAMS,
TSDB_GRANT_CPU_CORES,
} EGrantType;
int32_t grantInit();
void grantCleanUp();
void grantParseParameter();
int32_t grantCheck(EGrantType grant);
void grantReset(EGrantType grant, uint64_t value);
void grantAdd(EGrantType grant, uint64_t value);
void grantRestore(EGrantType grant, uint64_t value);
#ifdef __cplusplus #ifdef __cplusplus
} }
......
...@@ -63,6 +63,7 @@ int32_t sdbUpdateRow(SSdbOperDesc *pOper); ...@@ -63,6 +63,7 @@ int32_t sdbUpdateRow(SSdbOperDesc *pOper);
void *sdbGetRow(void *handle, void *key); void *sdbGetRow(void *handle, void *key);
void *sdbFetchRow(void *handle, void *pNode, void **ppRow); void *sdbFetchRow(void *handle, void *pNode, void **ppRow);
int64_t sdbGetNumOfRows(void *handle); int64_t sdbGetNumOfRows(void *handle);
int64_t sdbGetId(void *handle);
uint64_t sdbGetVersion(); uint64_t sdbGetVersion();
#ifdef __cplusplus #ifdef __cplusplus
......
...@@ -25,6 +25,8 @@ int32_t mgmtInitUsers(); ...@@ -25,6 +25,8 @@ int32_t mgmtInitUsers();
void mgmtCleanUpUsers(); void mgmtCleanUpUsers();
SUserObj *mgmtGetUser(char *name); SUserObj *mgmtGetUser(char *name);
SUserObj *mgmtGetUserFromConn(void *pConn, bool *usePublicIp); SUserObj *mgmtGetUserFromConn(void *pConn, bool *usePublicIp);
int32_t mgmtCreateUser(SAcctObj *pAcct, char *name, char *pass);
void mgmtDropAllUsers(SAcctObj *pAcct);
#ifdef __cplusplus #ifdef __cplusplus
} }
......
...@@ -15,21 +15,29 @@ ...@@ -15,21 +15,29 @@
#define _DEFAULT_SOURCE #define _DEFAULT_SOURCE
#include "os.h" #include "os.h"
#include "taoserror.h"
#include "mnode.h"
#include "mgmtAcct.h" #include "mgmtAcct.h"
#ifndef _ACCOUNT
static SAcctObj tsAcctObj; static SAcctObj tsAcctObj = {0};
int32_t (*mgmtInitAcctsFp)() = NULL; int32_t acctInit() {
void (*mgmtCleanUpAcctsFp)() = NULL; tsAcctObj.acctId = 0;
SAcctObj *(*mgmtGetAcctFp)(char *acctName) = NULL; strcpy(tsAcctObj.user, "root");
int32_t (*mgmtCheckUserLimitFp)(SAcctObj *pAcct) = NULL; return TSDB_CODE_SUCCESS;
int32_t (*mgmtCheckDbLimitFp)(SAcctObj *pAcct) = NULL; }
int32_t (*mgmtCheckTableLimitFp)(SAcctObj *pAcct, int32_t numOfTimeSeries) = NULL;
void acctCleanUp() {}
SAcctObj *acctGetAcct(char *acctName) { return &tsAcctObj; }
int32_t acctCheck(SAcctObj *pAcct, EAcctGrantType type) { return TSDB_CODE_SUCCESS; }
#endif
int32_t mgmtAddDbIntoAcct(SAcctObj *pAcct, SDbObj *pDb) { int32_t acctAddDb(SAcctObj *pAcct, SDbObj *pDb) {
pthread_mutex_lock(&pAcct->mutex); pthread_mutex_lock(&pAcct->mutex);
pDb->next = pAcct->pHead; pDb->next = pAcct->pHead;
pDb->prev = NULL; pDb->prev = NULL;
pDb->pAcct = pAcct;
if (pAcct->pHead) { if (pAcct->pHead) {
pAcct->pHead->prev = pDb; pAcct->pHead->prev = pDb;
...@@ -42,7 +50,7 @@ int32_t mgmtAddDbIntoAcct(SAcctObj *pAcct, SDbObj *pDb) { ...@@ -42,7 +50,7 @@ int32_t mgmtAddDbIntoAcct(SAcctObj *pAcct, SDbObj *pDb) {
return 0; return 0;
} }
int32_t mgmtRemoveDbFromAcct(SAcctObj *pAcct, SDbObj *pDb) { int32_t acctRemoveDb(SAcctObj *pAcct, SDbObj *pDb) {
pthread_mutex_lock(&pAcct->mutex); pthread_mutex_lock(&pAcct->mutex);
if (pDb->prev) { if (pDb->prev) {
pDb->prev->next = pDb->next; pDb->prev->next = pDb->next;
...@@ -62,7 +70,7 @@ int32_t mgmtRemoveDbFromAcct(SAcctObj *pAcct, SDbObj *pDb) { ...@@ -62,7 +70,7 @@ int32_t mgmtRemoveDbFromAcct(SAcctObj *pAcct, SDbObj *pDb) {
return 0; return 0;
} }
int32_t mgmtAddUserIntoAcct(SAcctObj *pAcct, SUserObj *pUser) { int32_t acctAddUser(SAcctObj *pAcct, SUserObj *pUser) {
pthread_mutex_lock(&pAcct->mutex); pthread_mutex_lock(&pAcct->mutex);
pUser->next = pAcct->pUser; pUser->next = pAcct->pUser;
pUser->prev = NULL; pUser->prev = NULL;
...@@ -79,7 +87,7 @@ int32_t mgmtAddUserIntoAcct(SAcctObj *pAcct, SUserObj *pUser) { ...@@ -79,7 +87,7 @@ int32_t mgmtAddUserIntoAcct(SAcctObj *pAcct, SUserObj *pUser) {
return 0; return 0;
} }
int32_t mgmtRemoveUserFromAcct(SAcctObj *pAcct, SUserObj *pUser) { int32_t acctRemoveUser(SAcctObj *pAcct, SUserObj *pUser) {
pthread_mutex_lock(&pAcct->mutex); pthread_mutex_lock(&pAcct->mutex);
if (pUser->prev) { if (pUser->prev) {
pUser->prev->next = pUser->next; pUser->prev->next = pUser->next;
...@@ -88,7 +96,7 @@ int32_t mgmtRemoveUserFromAcct(SAcctObj *pAcct, SUserObj *pUser) { ...@@ -88,7 +96,7 @@ int32_t mgmtRemoveUserFromAcct(SAcctObj *pAcct, SUserObj *pUser) {
if (pUser->next) { if (pUser->next) {
pUser->next->prev = pUser->prev; pUser->next->prev = pUser->prev;
} }
if (pUser->prev == NULL) { if (pUser->prev == NULL) {
pAcct->pUser = pUser->next; pAcct->pUser = pUser->next;
} }
...@@ -97,51 +105,4 @@ int32_t mgmtRemoveUserFromAcct(SAcctObj *pAcct, SUserObj *pUser) { ...@@ -97,51 +105,4 @@ int32_t mgmtRemoveUserFromAcct(SAcctObj *pAcct, SUserObj *pUser) {
pthread_mutex_unlock(&pAcct->mutex); pthread_mutex_unlock(&pAcct->mutex);
return 0; return 0;
}
int32_t mgmtInitAccts() {
if (mgmtInitAcctsFp) {
return (*mgmtInitAcctsFp)();
} else {
tsAcctObj.acctId = 0;
strcpy(tsAcctObj.user, "root");
return 0;
}
}
SAcctObj *mgmtGetAcct(char *acctName) {
if (mgmtGetAcctFp) {
return (*mgmtGetAcctFp)(acctName);
} else {
return &tsAcctObj;
}
}
void mgmtCleanUpAccts() {
if (mgmtCleanUpAcctsFp) {
(*mgmtCleanUpAcctsFp)();
}
}
int32_t mgmtCheckUserLimit(SAcctObj *pAcct) {
if (mgmtCheckUserLimitFp) {
return (*mgmtCheckUserLimitFp)(pAcct);
}
return 0;
}
int32_t mgmtCheckDbLimit(SAcctObj *pAcct) {
if (mgmtCheckDbLimitFp) {
return (*mgmtCheckDbLimitFp)(pAcct);
} else {
return 0;
}
}
int32_t mgmtCheckTableLimit(SAcctObj *pAcct, int32_t numOfTimeSeries) {
if (mgmtCheckTableLimitFp) {
return (*mgmtCheckTableLimitFp)(pAcct, numOfTimeSeries);
} else {
return 0;
}
} }
\ No newline at end of file
...@@ -48,7 +48,7 @@ ...@@ -48,7 +48,7 @@
#include "mgmtVgroup.h" #include "mgmtVgroup.h"
#include "mgmtUser.h" #include "mgmtUser.h"
static void *tsChildTableSdb; void *tsChildTableSdb;
static int32_t tsChildTableUpdateSize; static int32_t tsChildTableUpdateSize;
static void mgmtProcessMultiTableMetaMsg(SQueuedMsg *queueMsg); static void mgmtProcessMultiTableMetaMsg(SQueuedMsg *queueMsg);
static void mgmtProcessCreateTableRsp(SRpcMsg *rpcMsg); static void mgmtProcessCreateTableRsp(SRpcMsg *rpcMsg);
...@@ -84,7 +84,7 @@ static int32_t mgmtChildTableActionInsert(SSdbOperDesc *pOper) { ...@@ -84,7 +84,7 @@ static int32_t mgmtChildTableActionInsert(SSdbOperDesc *pOper) {
return TSDB_CODE_INVALID_DB; return TSDB_CODE_INVALID_DB;
} }
SAcctObj *pAcct = mgmtGetAcct(pDb->cfg.acct); SAcctObj *pAcct = acctGetAcct(pDb->cfg.acct);
if (pAcct == NULL) { if (pAcct == NULL) {
mError("ctable:%s, account:%s not exists", pTable->info.tableId, pDb->cfg.acct); mError("ctable:%s, account:%s not exists", pTable->info.tableId, pDb->cfg.acct);
return TSDB_CODE_INVALID_ACCT; return TSDB_CODE_INVALID_ACCT;
...@@ -93,9 +93,11 @@ static int32_t mgmtChildTableActionInsert(SSdbOperDesc *pOper) { ...@@ -93,9 +93,11 @@ static int32_t mgmtChildTableActionInsert(SSdbOperDesc *pOper) {
if (pTable->info.type == TSDB_CHILD_TABLE) { if (pTable->info.type == TSDB_CHILD_TABLE) {
pTable->superTable = mgmtGetSuperTable(pTable->superTableId); pTable->superTable = mgmtGetSuperTable(pTable->superTableId);
pTable->superTable->numOfTables++; pTable->superTable->numOfTables++;
mgmtAddTimeSeries(pAcct, pTable->superTable->numOfColumns - 1); grantAdd(TSDB_GRANT_TIMESERIES, pTable->superTable->numOfColumns - 1);
pAcct->acctInfo.numOfTimeSeries += (pTable->superTable->numOfColumns - 1);
} else { } else {
mgmtAddTimeSeries(pAcct, pTable->numOfColumns - 1); grantAdd(TSDB_GRANT_TIMESERIES, pTable->numOfColumns - 1);
pAcct->acctInfo.numOfTimeSeries += (pTable->numOfColumns - 1);
} }
mgmtAddTableIntoDb(pDb); mgmtAddTableIntoDb(pDb);
mgmtAddTableIntoVgroup(pVgroup, pTable); mgmtAddTableIntoVgroup(pVgroup, pTable);
...@@ -120,17 +122,19 @@ static int32_t mgmtChildTableActionDelete(SSdbOperDesc *pOper) { ...@@ -120,17 +122,19 @@ static int32_t mgmtChildTableActionDelete(SSdbOperDesc *pOper) {
return TSDB_CODE_INVALID_DB; return TSDB_CODE_INVALID_DB;
} }
SAcctObj *pAcct = mgmtGetAcct(pDb->cfg.acct); SAcctObj *pAcct = acctGetAcct(pDb->cfg.acct);
if (pAcct == NULL) { if (pAcct == NULL) {
mError("ctable:%s, account:%s not exists", pTable->info.tableId, pDb->cfg.acct); mError("ctable:%s, account:%s not exists", pTable->info.tableId, pDb->cfg.acct);
return TSDB_CODE_INVALID_ACCT; return TSDB_CODE_INVALID_ACCT;
} }
if (pTable->info.type == TSDB_CHILD_TABLE) { if (pTable->info.type == TSDB_CHILD_TABLE) {
mgmtRestoreTimeSeries(pAcct, pTable->superTable->numOfColumns - 1); grantRestore(TSDB_GRANT_TIMESERIES, pTable->superTable->numOfColumns - 1);
pAcct->acctInfo.numOfTimeSeries -= (pTable->superTable->numOfColumns - 1);
pTable->superTable->numOfTables--; pTable->superTable->numOfTables--;
} else { } else {
mgmtRestoreTimeSeries(pAcct, pTable->numOfColumns - 1); grantRestore(TSDB_GRANT_TIMESERIES, pTable->numOfColumns - 1);
pAcct->acctInfo.numOfTimeSeries -= (pTable->numOfColumns - 1);
} }
mgmtRemoveTableFromDb(pDb); mgmtRemoveTableFromDb(pDb);
mgmtRemoveTableFromVgroup(pVgroup, pTable); mgmtRemoveTableFromVgroup(pVgroup, pTable);
...@@ -464,9 +468,9 @@ static SChildTableObj* mgmtDoCreateChildTable(SCMCreateTableMsg *pCreate, SVgObj ...@@ -464,9 +468,9 @@ static SChildTableObj* mgmtDoCreateChildTable(SCMCreateTableMsg *pCreate, SVgObj
void mgmtCreateChildTable(SQueuedMsg *pMsg) { void mgmtCreateChildTable(SQueuedMsg *pMsg) {
SCMCreateTableMsg *pCreate = pMsg->pCont; SCMCreateTableMsg *pCreate = pMsg->pCont;
int32_t code = mgmtCheckTimeSeries(htons(pCreate->numOfColumns)); int32_t code = grantCheck(TSDB_GRANT_TIMESERIES);
if (code != TSDB_CODE_SUCCESS) { if (code != TSDB_CODE_SUCCESS) {
mError("table:%s, failed to create, timeseries exceed the limit", pCreate->tableId); mError("table:%s, failed to create, grant not", pCreate->tableId);
mgmtSendSimpleResp(pMsg->thandle, code); mgmtSendSimpleResp(pMsg->thandle, code);
return; return;
} }
...@@ -634,7 +638,7 @@ static int32_t mgmtAddNormalTableColumn(SChildTableObj *pTable, SSchema schema[] ...@@ -634,7 +638,7 @@ static int32_t mgmtAddNormalTableColumn(SChildTableObj *pTable, SSchema schema[]
return TSDB_CODE_APP_ERROR; return TSDB_CODE_APP_ERROR;
} }
SAcctObj *pAcct = mgmtGetAcct(pDb->cfg.acct); SAcctObj *pAcct = acctGetAcct(pDb->cfg.acct);
if (pAcct == NULL) { if (pAcct == NULL) {
mError("DB: %s not belongs to andy account", pDb->name); mError("DB: %s not belongs to andy account", pDb->name);
return TSDB_CODE_APP_ERROR; return TSDB_CODE_APP_ERROR;
...@@ -677,7 +681,7 @@ static int32_t mgmtDropNormalTableColumnByName(SChildTableObj *pTable, char *col ...@@ -677,7 +681,7 @@ static int32_t mgmtDropNormalTableColumnByName(SChildTableObj *pTable, char *col
return TSDB_CODE_APP_ERROR; return TSDB_CODE_APP_ERROR;
} }
SAcctObj *pAcct = mgmtGetAcct(pDb->cfg.acct); SAcctObj *pAcct = acctGetAcct(pDb->cfg.acct);
if (pAcct == NULL) { if (pAcct == NULL) {
mError("DB: %s not belongs to any account", pDb->name); mError("DB: %s not belongs to any account", pDb->name);
return TSDB_CODE_APP_ERROR; return TSDB_CODE_APP_ERROR;
......
...@@ -35,7 +35,7 @@ ...@@ -35,7 +35,7 @@
#include "mgmtUser.h" #include "mgmtUser.h"
#include "mgmtVgroup.h" #include "mgmtVgroup.h"
static void *tsDbSdb = NULL; void * tsDbSdb = NULL;
static int32_t tsDbUpdateSize; static int32_t tsDbUpdateSize;
static int32_t mgmtCreateDb(SAcctObj *pAcct, SCMCreateDbMsg *pCreate); static int32_t mgmtCreateDb(SAcctObj *pAcct, SCMCreateDbMsg *pCreate);
...@@ -54,7 +54,7 @@ static int32_t mgmtDbActionDestroy(SSdbOperDesc *pOper) { ...@@ -54,7 +54,7 @@ static int32_t mgmtDbActionDestroy(SSdbOperDesc *pOper) {
static int32_t mgmtDbActionInsert(SSdbOperDesc *pOper) { static int32_t mgmtDbActionInsert(SSdbOperDesc *pOper) {
SDbObj *pDb = pOper->pObj; SDbObj *pDb = pOper->pObj;
SAcctObj *pAcct = mgmtGetAcct(pDb->cfg.acct); SAcctObj *pAcct = acctGetAcct(pDb->cfg.acct);
pDb->pHead = NULL; pDb->pHead = NULL;
pDb->pTail = NULL; pDb->pTail = NULL;
...@@ -65,7 +65,7 @@ static int32_t mgmtDbActionInsert(SSdbOperDesc *pOper) { ...@@ -65,7 +65,7 @@ static int32_t mgmtDbActionInsert(SSdbOperDesc *pOper) {
pDb->numOfSuperTables = 0; pDb->numOfSuperTables = 0;
if (pAcct != NULL) { if (pAcct != NULL) {
mgmtAddDbIntoAcct(pAcct, pDb); acctAddDb(pAcct, pDb);
} }
else { else {
mError("db:%s, acct:%s info not exist in sdb", pDb->name, pDb->cfg.acct); mError("db:%s, acct:%s info not exist in sdb", pDb->name, pDb->cfg.acct);
...@@ -77,9 +77,9 @@ static int32_t mgmtDbActionInsert(SSdbOperDesc *pOper) { ...@@ -77,9 +77,9 @@ static int32_t mgmtDbActionInsert(SSdbOperDesc *pOper) {
static int32_t mgmtDbActionDelete(SSdbOperDesc *pOper) { static int32_t mgmtDbActionDelete(SSdbOperDesc *pOper) {
SDbObj *pDb = pOper->pObj; SDbObj *pDb = pOper->pObj;
SAcctObj *pAcct = mgmtGetAcct(pDb->cfg.acct); SAcctObj *pAcct = acctGetAcct(pDb->cfg.acct);
mgmtRemoveDbFromAcct(pAcct, pDb); acctRemoveDb(pAcct, pDb);
mgmtDropAllChildTables(pDb); mgmtDropAllChildTables(pDb);
mgmtDropAllSuperTables(pDb); mgmtDropAllSuperTables(pDb);
mgmtDropAllVgroups(pDb); mgmtDropAllVgroups(pDb);
...@@ -277,7 +277,7 @@ static int32_t mgmtCheckDbParams(SCMCreateDbMsg *pCreate) { ...@@ -277,7 +277,7 @@ static int32_t mgmtCheckDbParams(SCMCreateDbMsg *pCreate) {
} }
static int32_t mgmtCreateDb(SAcctObj *pAcct, SCMCreateDbMsg *pCreate) { static int32_t mgmtCreateDb(SAcctObj *pAcct, SCMCreateDbMsg *pCreate) {
int32_t code = mgmtCheckDbLimit(pAcct); int32_t code = acctCheck(pAcct, TSDB_ACCT_DB);
if (code != 0) { if (code != 0) {
return code; return code;
} }
...@@ -292,7 +292,7 @@ static int32_t mgmtCreateDb(SAcctObj *pAcct, SCMCreateDbMsg *pCreate) { ...@@ -292,7 +292,7 @@ static int32_t mgmtCreateDb(SAcctObj *pAcct, SCMCreateDbMsg *pCreate) {
assert(pCreate->daysToKeep1 <= pCreate->daysToKeep2 && pCreate->daysToKeep2 <= pCreate->daysToKeep); assert(pCreate->daysToKeep1 <= pCreate->daysToKeep2 && pCreate->daysToKeep2 <= pCreate->daysToKeep);
code = mgmtCheckDbGrant(); code = grantCheck(TSDB_GRANT_DB);
if (code != 0) { if (code != 0) {
return code; return code;
} }
...@@ -692,7 +692,7 @@ static void mgmtProcessCreateDbMsg(SQueuedMsg *pMsg) { ...@@ -692,7 +692,7 @@ static void mgmtProcessCreateDbMsg(SQueuedMsg *pMsg) {
pCreate->rowsInFileBlock = htonl(pCreate->rowsInFileBlock); pCreate->rowsInFileBlock = htonl(pCreate->rowsInFileBlock);
int32_t code; int32_t code;
if (mgmtCheckExpired()) { if (grantCheck(TSDB_GRANT_TIME) != TSDB_CODE_SUCCESS) {
code = TSDB_CODE_GRANT_EXPIRED; code = TSDB_CODE_GRANT_EXPIRED;
} else if (!pMsg->pUser->writeAuth) { } else if (!pMsg->pUser->writeAuth) {
code = TSDB_CODE_NO_RIGHTS; code = TSDB_CODE_NO_RIGHTS;
...@@ -771,7 +771,7 @@ static void mgmtProcessAlterDbMsg(SQueuedMsg *pMsg) { ...@@ -771,7 +771,7 @@ static void mgmtProcessAlterDbMsg(SQueuedMsg *pMsg) {
SCMAlterDbMsg *pAlter = pMsg->pCont; SCMAlterDbMsg *pAlter = pMsg->pCont;
mTrace("db:%s, alter db msg is received from thandle:%p", pAlter->db, pMsg->thandle); mTrace("db:%s, alter db msg is received from thandle:%p", pAlter->db, pMsg->thandle);
if (mgmtCheckExpired()) { if (grantCheck(TSDB_GRANT_TIME) != TSDB_CODE_SUCCESS) {
mError("db:%s, failed to alter, grant expired", pAlter->db); mError("db:%s, failed to alter, grant expired", pAlter->db);
mgmtSendSimpleResp(pMsg->thandle, TSDB_CODE_GRANT_EXPIRED); mgmtSendSimpleResp(pMsg->thandle, TSDB_CODE_GRANT_EXPIRED);
return; return;
...@@ -842,7 +842,7 @@ static void mgmtProcessDropDbMsg(SQueuedMsg *pMsg) { ...@@ -842,7 +842,7 @@ static void mgmtProcessDropDbMsg(SQueuedMsg *pMsg) {
SCMDropDbMsg *pDrop = pMsg->pCont; SCMDropDbMsg *pDrop = pMsg->pCont;
mTrace("db:%s, drop db msg is received from thandle:%p", pDrop->db, pMsg->thandle); mTrace("db:%s, drop db msg is received from thandle:%p", pDrop->db, pMsg->thandle);
if (mgmtCheckExpired()) { if (grantCheck(TSDB_GRANT_TIME) != TSDB_CODE_SUCCESS) {
mError("db:%s, failed to drop, grant expired", pDrop->db); mError("db:%s, failed to drop, grant expired", pDrop->db);
mgmtSendSimpleResp(pMsg->thandle, TSDB_CODE_GRANT_EXPIRED); mgmtSendSimpleResp(pMsg->thandle, TSDB_CODE_GRANT_EXPIRED);
return; return;
...@@ -899,3 +899,20 @@ static void mgmtProcessDropDbMsg(SQueuedMsg *pMsg) { ...@@ -899,3 +899,20 @@ static void mgmtProcessDropDbMsg(SQueuedMsg *pMsg) {
newMsg->ahandle = pDb; newMsg->ahandle = pDb;
taosTmrReset(mgmtDropDb, 10, newMsg, tsMgmtTmr, &tmpTmr); taosTmrReset(mgmtDropDb, 10, newMsg, tsMgmtTmr, &tmpTmr);
} }
void mgmtDropAllDbs(SAcctObj *pAcct) {
int32_t numOfDbs = 0;
SDbObj *pDb = NULL;
while (1) {
void *pNode = sdbFetchRow(tsDbSdb, pNode, (void **)&pDb);
if (pDb == NULL) break;
if (pDb->pAcct == pAcct) {
mgmtSetDbDirty(pDb);
numOfDbs++;
}
}
mTrace("acct:%s, all dbs is is set dirty", pAcct->acctId, numOfDbs);
}
\ No newline at end of file
...@@ -26,454 +26,76 @@ ...@@ -26,454 +26,76 @@
#include "mgmtUser.h" #include "mgmtUser.h"
#include "mgmtVgroup.h" #include "mgmtVgroup.h"
int32_t (*mgmtInitDnodesFp)() = NULL;
void (*mgmtCleanUpDnodesFp)() = NULL;
SDnodeObj *(*mgmtGetDnodeFp)(uint32_t ip) = NULL;
SDnodeObj *(*mgmtGetDnodeByIpFp)(int32_t dnodeId) = NULL;
int32_t (*mgmtGetDnodesNumFp)() = NULL;
int32_t (*mgmtUpdateDnodeFp)(SDnodeObj *pDnode) = NULL;
void * (*mgmtGetNextDnodeFp)(SShowObj *pShow, SDnodeObj **pDnode) = NULL;
void (*mgmtSetDnodeUnRemoveFp)(SDnodeObj *pDnode) = NULL;
static SDnodeObj tsDnodeObj = {0};
static void * mgmtGetNextDnode(SShowObj *pShow, SDnodeObj **pDnode);
static bool mgmtCheckConfigShow(SGlobalConfig *cfg);
static int32_t mgmtGetModuleMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn);
static int32_t mgmtRetrieveModules(SShowObj *pShow, char *data, int32_t rows, void *pConn);
static int32_t mgmtGetConfigMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn);
static int32_t mgmtRetrieveConfigs(SShowObj *pShow, char *data, int32_t rows, void *pConn);
static int32_t mgmtGetVnodeMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn);
static int32_t mgmtRetrieveVnodes(SShowObj *pShow, char *data, int32_t rows, void *pConn);
static void mgmtProcessCfgDnodeMsg(SQueuedMsg *pMsg); static void mgmtProcessCfgDnodeMsg(SQueuedMsg *pMsg);
static void mgmtProcessCfgDnodeMsgRsp(SRpcMsg *rpcMsg) ; static void mgmtProcessCfgDnodeMsgRsp(SRpcMsg *rpcMsg) ;
static void mgmtProcessDnodeStatusMsg(SRpcMsg *rpcMsg); static void mgmtProcessDnodeStatusMsg(SRpcMsg *rpcMsg);
extern int32_t clusterInit();
void mgmtSetDnodeMaxVnodes(SDnodeObj *pDnode) { extern void clusterCleanUp();
int32_t maxVnodes = pDnode->numOfCores * tsNumOfVnodesPerCore; extern int32_t clusterGetDnodesNum();
extern SDnodeObj* clusterGetDnode(int32_t dnodeId);
maxVnodes = maxVnodes > TSDB_MAX_VNODES ? TSDB_MAX_VNODES : maxVnodes; extern SDnodeObj* clusterGetDnodeByIp(uint32_t ip);
maxVnodes = maxVnodes < TSDB_MIN_VNODES ? TSDB_MIN_VNODES : maxVnodes; static SDnodeObj tsDnodeObj = {0};
if (pDnode->numOfTotalVnodes == 0) {
pDnode->numOfTotalVnodes = maxVnodes;
}
if (pDnode->alternativeRole == TSDB_DNODE_ROLE_MGMT) {
pDnode->numOfTotalVnodes = 0;
}
pDnode->openVnodes = 0;
pDnode->status = TSDB_DN_STATUS_OFFLINE;
mgmtUpdateDnode(pDnode);
}
bool mgmtCheckModuleInDnode(SDnodeObj *pDnode, int32_t moduleType) {
uint32_t status = pDnode->moduleStatus & (1 << moduleType);
return status > 0;
}
int32_t mgmtGetModuleMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn) {
int32_t cols = 0;
SUserObj *pUser = mgmtGetUserFromConn(pConn, NULL);
if (pUser == NULL) return 0;
if (strcmp(pUser->user, "root") != 0) return TSDB_CODE_NO_RIGHTS;
SSchema *pSchema = pMeta->schema;
pShow->bytes[cols] = 16;
pSchema[cols].type = TSDB_DATA_TYPE_BINARY;
strcpy(pSchema[cols].name, "IP");
pSchema[cols].bytes = htons(pShow->bytes[cols]);
cols++;
pShow->bytes[cols] = 10;
pSchema[cols].type = TSDB_DATA_TYPE_BINARY;
strcpy(pSchema[cols].name, "module type");
pSchema[cols].bytes = htons(pShow->bytes[cols]);
cols++;
pShow->bytes[cols] = 10;
pSchema[cols].type = TSDB_DATA_TYPE_BINARY;
strcpy(pSchema[cols].name, "module status");
pSchema[cols].bytes = htons(pShow->bytes[cols]);
cols++;
pMeta->numOfColumns = htons(cols);
pShow->numOfColumns = cols;
pShow->offset[0] = 0;
for (int32_t i = 1; i < cols; ++i) {
pShow->offset[i] = pShow->offset[i - 1] + pShow->bytes[i - 1];
}
pShow->numOfRows = 0;
SDnodeObj *pDnode = NULL;
while (1) {
pShow->pNode = mgmtGetNextDnode(pShow, (SDnodeObj **)&pDnode);
if (pDnode == NULL) break;
for (int32_t moduleType = 0; moduleType < TSDB_MOD_MAX; ++moduleType) {
if (mgmtCheckModuleInDnode(pDnode, moduleType)) {
pShow->numOfRows++;
}
}
}
pShow->rowSize = pShow->offset[cols - 1] + pShow->bytes[cols - 1];
pShow->pNode = NULL;
return 0;
}
int32_t mgmtRetrieveModules(SShowObj *pShow, char *data, int32_t rows, void *pConn) {
int32_t numOfRows = 0;
SDnodeObj *pDnode = NULL;
char * pWrite;
int32_t cols = 0;
char ipstr[20];
while (numOfRows < rows) {
pShow->pNode = mgmtGetNextDnode(pShow, (SDnodeObj **)&pDnode);
if (pDnode == NULL) break;
for (int32_t moduleType = 0; moduleType < TSDB_MOD_MAX; ++moduleType) {
if (!mgmtCheckModuleInDnode(pDnode, moduleType)) {
continue;
}
cols = 0;
tinet_ntoa(ipstr, pDnode->privateIp);
pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows;
strcpy(pWrite, ipstr);
cols++;
pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows;
strcpy(pWrite, tsModule[moduleType].name);
cols++;
pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows;
strcpy(pWrite, taosGetDnodeStatusStr(pDnode->status) );
cols++;
numOfRows++;
}
}
pShow->numOfReads += numOfRows;
return numOfRows;
}
static int32_t mgmtGetConfigMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn) {
int32_t cols = 0;
SUserObj *pUser = mgmtGetUserFromConn(pConn, NULL);
if (pUser == NULL) return 0;
if (strcmp(pUser->user, "root") != 0) return TSDB_CODE_NO_RIGHTS;
SSchema *pSchema = pMeta->schema;
pShow->bytes[cols] = TSDB_CFG_OPTION_LEN;
pSchema[cols].type = TSDB_DATA_TYPE_BINARY;
strcpy(pSchema[cols].name, "config name");
pSchema[cols].bytes = htons(pShow->bytes[cols]);
cols++;
pShow->bytes[cols] = TSDB_CFG_VALUE_LEN;
pSchema[cols].type = TSDB_DATA_TYPE_BINARY;
strcpy(pSchema[cols].name, "config value");
pSchema[cols].bytes = htons(pShow->bytes[cols]);
cols++;
pMeta->numOfColumns = htons(cols);
pShow->numOfColumns = cols;
pShow->offset[0] = 0;
for (int32_t i = 1; i < cols; ++i) pShow->offset[i] = pShow->offset[i - 1] + pShow->bytes[i - 1];
pShow->numOfRows = 0;
for (int32_t i = tsGlobalConfigNum - 1; i >= 0; --i) {
SGlobalConfig *cfg = tsGlobalConfig + i;
if (!mgmtCheckConfigShow(cfg)) continue;
pShow->numOfRows++;
}
pShow->rowSize = pShow->offset[cols - 1] + pShow->bytes[cols - 1];
pShow->pNode = NULL;
return 0;
}
static int32_t mgmtRetrieveConfigs(SShowObj *pShow, char *data, int32_t rows, void *pConn) {
int32_t numOfRows = 0;
for (int32_t i = tsGlobalConfigNum - 1; i >= 0 && numOfRows < rows; --i) {
SGlobalConfig *cfg = tsGlobalConfig + i;
if (!mgmtCheckConfigShow(cfg)) continue;
char *pWrite;
int32_t cols = 0;
pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows;
snprintf(pWrite, TSDB_CFG_OPTION_LEN, "%s", cfg->option);
cols++;
pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows;
switch (cfg->valType) {
case TSDB_CFG_VTYPE_SHORT:
snprintf(pWrite, TSDB_CFG_VALUE_LEN, "%d", *((int16_t *)cfg->ptr));
numOfRows++;
break;
case TSDB_CFG_VTYPE_INT:
snprintf(pWrite, TSDB_CFG_VALUE_LEN, "%d", *((int32_t *)cfg->ptr));
numOfRows++;
break;
case TSDB_CFG_VTYPE_UINT:
snprintf(pWrite, TSDB_CFG_VALUE_LEN, "%d", *((uint32_t *)cfg->ptr));
numOfRows++;
break;
case TSDB_CFG_VTYPE_FLOAT:
snprintf(pWrite, TSDB_CFG_VALUE_LEN, "%f", *((float *)cfg->ptr));
numOfRows++;
break;
case TSDB_CFG_VTYPE_STRING:
case TSDB_CFG_VTYPE_IPSTR:
case TSDB_CFG_VTYPE_DIRECTORY:
snprintf(pWrite, TSDB_CFG_VALUE_LEN, "%s", (char *)cfg->ptr);
numOfRows++;
break;
default:
break;
}
}
pShow->numOfReads += numOfRows;
return numOfRows;
}
static int32_t mgmtGetVnodeMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn) {
int32_t cols = 0;
SUserObj *pUser = mgmtGetUserFromConn(pConn, NULL);
if (pUser == NULL) return 0;
if (strcmp(pUser->user, "root") != 0) return TSDB_CODE_NO_RIGHTS;
SSchema *pSchema = pMeta->schema;
pShow->bytes[cols] = 4;
pSchema[cols].type = TSDB_DATA_TYPE_INT;
strcpy(pSchema[cols].name, "vnode");
pSchema[cols].bytes = htons(pShow->bytes[cols]);
cols++;
pShow->bytes[cols] = 12;
pSchema[cols].type = TSDB_DATA_TYPE_BINARY;
strcpy(pSchema[cols].name, "status");
pSchema[cols].bytes = htons(pShow->bytes[cols]);
cols++;
pShow->bytes[cols] = 12;
pSchema[cols].type = TSDB_DATA_TYPE_BINARY;
strcpy(pSchema[cols].name, "sync_status");
pSchema[cols].bytes = htons(pShow->bytes[cols]);
cols++;
pMeta->numOfColumns = htons(cols);
pShow->numOfColumns = cols;
pShow->offset[0] = 0;
for (int32_t i = 1; i < cols; ++i) pShow->offset[i] = pShow->offset[i - 1] + pShow->bytes[i - 1];
SDnodeObj *pDnode = NULL;
if (pShow->payloadLen > 0 ) {
uint32_t ip = ip2uint(pShow->payload);
pDnode = mgmtGetDnodeByIp(ip);
if (NULL == pDnode) {
return TSDB_CODE_NODE_OFFLINE;
}
SVnodeLoad* pVnode;
pShow->numOfRows = 0;
for (int32_t i = 0 ; i < TSDB_MAX_VNODES; i++) {
pVnode = &pDnode->vload[i];
if (0 != pVnode->vgId) {
pShow->numOfRows++;
}
}
pShow->pNode = pDnode;
} else {
while (true) {
pShow->pNode = mgmtGetNextDnode(pShow, (SDnodeObj **)&pDnode);
if (pDnode == NULL) break;
pShow->numOfRows += pDnode->openVnodes;
if (0 == pShow->numOfRows) return TSDB_CODE_NODE_OFFLINE;
}
pShow->pNode = NULL;
}
pShow->rowSize = pShow->offset[cols - 1] + pShow->bytes[cols - 1];
return 0;
}
static int32_t mgmtRetrieveVnodes(SShowObj *pShow, char *data, int32_t rows, void *pConn) {
int32_t numOfRows = 0;
SDnodeObj *pDnode = NULL;
char * pWrite;
int32_t cols = 0;
if (0 == rows) return 0;
if (pShow->payloadLen) {
// output the vnodes info of the designated dnode. And output all vnodes of this dnode, instead of rows (max 100)
pDnode = (SDnodeObj *)(pShow->pNode);
if (pDnode != NULL) {
SVnodeLoad* pVnode;
for (int32_t i = 0 ; i < TSDB_MAX_VNODES; i++) {
pVnode = &pDnode->vload[i];
if (0 == pVnode->vgId) {
continue;
}
cols = 0;
pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows;
*(uint32_t *)pWrite = pVnode->vgId;
cols++;
pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows;
strcpy(pWrite, taosGetVnodeStatusStr(pVnode->status));
cols++;
pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows;
strcpy(pWrite, taosGetVnodeSyncStatusStr(pVnode->syncStatus));
cols++;
numOfRows++;
}
}
} else {
// TODO: output all vnodes of all dnodes
numOfRows = 0;
}
pShow->numOfReads += numOfRows;
return numOfRows;
}
int32_t mgmtInitDnodes() { int32_t mgmtInitDnodes() {
mgmtAddShellShowMetaHandle(TSDB_MGMT_TABLE_MODULE, mgmtGetModuleMeta);
mgmtAddShellShowRetrieveHandle(TSDB_MGMT_TABLE_MODULE, mgmtRetrieveModules);
mgmtAddShellShowMetaHandle(TSDB_MGMT_TABLE_CONFIGS, mgmtGetConfigMeta);
mgmtAddShellShowRetrieveHandle(TSDB_MGMT_TABLE_CONFIGS, mgmtRetrieveConfigs);
mgmtAddShellShowMetaHandle(TSDB_MGMT_TABLE_VNODES, mgmtGetVnodeMeta);
mgmtAddShellShowRetrieveHandle(TSDB_MGMT_TABLE_VNODES, mgmtRetrieveVnodes);
mgmtAddShellMsgHandle(TSDB_MSG_TYPE_CM_CONFIG_DNODE, mgmtProcessCfgDnodeMsg); mgmtAddShellMsgHandle(TSDB_MSG_TYPE_CM_CONFIG_DNODE, mgmtProcessCfgDnodeMsg);
mgmtAddDClientRspHandle(TSDB_MSG_TYPE_MD_CONFIG_DNODE_RSP, mgmtProcessCfgDnodeMsgRsp); mgmtAddDClientRspHandle(TSDB_MSG_TYPE_MD_CONFIG_DNODE_RSP, mgmtProcessCfgDnodeMsgRsp);
mgmtAddDServerMsgHandle(TSDB_MSG_TYPE_DM_STATUS, mgmtProcessDnodeStatusMsg); mgmtAddDServerMsgHandle(TSDB_MSG_TYPE_DM_STATUS, mgmtProcessDnodeStatusMsg);
if (mgmtInitDnodesFp) { #ifdef _CLUSTER
return mgmtInitDnodesFp(); return clusterInit();
} else { #else
tsDnodeObj.dnodeId = 1; tsDnodeObj.dnodeId = 1;
tsDnodeObj.privateIp = inet_addr(tsPrivateIp); tsDnodeObj.privateIp = inet_addr(tsPrivateIp);
tsDnodeObj.publicIp = inet_addr(tsPublicIp); tsDnodeObj.publicIp = inet_addr(tsPublicIp);
tsDnodeObj.createdTime = taosGetTimestampMs(); tsDnodeObj.createdTime = taosGetTimestampMs();
tsDnodeObj.numOfTotalVnodes = tsNumOfTotalVnodes; tsDnodeObj.numOfTotalVnodes = tsNumOfTotalVnodes;
tsDnodeObj.numOfCores = (uint16_t) tsNumOfCores; tsDnodeObj.status = TSDB_DN_STATUS_OFFLINE;
tsDnodeObj.alternativeRole = TSDB_DNODE_ROLE_ANY; tsDnodeObj.lastReboot = taosGetTimestampSec();
tsDnodeObj.status = TSDB_DN_STATUS_OFFLINE; sprintf(tsDnodeObj.dnodeName, "%d", tsDnodeObj.dnodeId);
tsDnodeObj.lastReboot = taosGetTimestampSec();
sprintf(tsDnodeObj.dnodeName, "%d", tsDnodeObj.dnodeId); tsDnodeObj.moduleStatus |= (1 << TSDB_MOD_MGMT);
mgmtSetDnodeMaxVnodes(&tsDnodeObj); if (tsEnableHttpModule) {
tsDnodeObj.moduleStatus |= (1 << TSDB_MOD_HTTP);
tsDnodeObj.moduleStatus |= (1 << TSDB_MOD_MGMT); }
if (tsEnableHttpModule) { if (tsEnableMonitorModule) {
tsDnodeObj.moduleStatus |= (1 << TSDB_MOD_HTTP); tsDnodeObj.moduleStatus |= (1 << TSDB_MOD_MONITOR);
}
if (tsEnableMonitorModule) {
tsDnodeObj.moduleStatus |= (1 << TSDB_MOD_MONITOR);
}
return 0;
} }
return 0;
#endif
} }
void mgmtCleanUpDnodes() { void mgmtCleanUpDnodes() {
if (mgmtCleanUpDnodesFp) { #ifdef _CLUSTER
(*mgmtCleanUpDnodesFp)(); clusterCleanUp();
} #endif
} }
SDnodeObj *mgmtGetDnode(int32_t dnodeId) { SDnodeObj *mgmtGetDnode(int32_t dnodeId) {
if (mgmtGetDnodeFp) { #ifdef _CLUSTER
return (*mgmtGetDnodeFp)(dnodeId); return clusterGetDnode(dnodeId);
} #else
if (dnodeId == 1) { if (dnodeId == 1) {
return &tsDnodeObj; return &tsDnodeObj;
} else {
return NULL;
} }
return NULL; #endif
} }
SDnodeObj *mgmtGetDnodeByIp(uint32_t ip) { SDnodeObj *mgmtGetDnodeByIp(uint32_t ip) {
if (mgmtGetDnodeByIpFp) { #ifdef _CLUSTER
return (*mgmtGetDnodeByIpFp)(ip); return clusterGetDnodeByIp(ip);
} #else
return &tsDnodeObj; return &tsDnodeObj;
#endif
} }
int32_t mgmtGetDnodesNum() { int32_t mgmtGetDnodesNum() {
if (mgmtGetDnodesNumFp) { #ifdef _CLUSTER
return (*mgmtGetDnodesNumFp)(); return clusterGetDnodesNum();
} else { #else
return 1; return 1;
} #endif
}
int32_t mgmtUpdateDnode(SDnodeObj *pDnode) {
if (mgmtUpdateDnodeFp) {
return (*mgmtUpdateDnodeFp)(pDnode);
} else {
return 0;
}
}
void *mgmtGetNextDnode(SShowObj *pShow, SDnodeObj **pDnode) {
if (mgmtGetNextDnodeFp) {
return (*mgmtGetNextDnodeFp)(pShow, pDnode);
} else {
if (*pDnode == NULL) {
*pDnode = &tsDnodeObj;
} else {
*pDnode = NULL;
}
}
return *pDnode;
}
void mgmtSetDnodeUnRemove(SDnodeObj *pDnode) {
if (mgmtSetDnodeUnRemoveFp) {
(*mgmtSetDnodeUnRemoveFp)(pDnode);
}
}
bool mgmtCheckConfigShow(SGlobalConfig *cfg) {
if (!(cfg->cfgType & TSDB_CFG_CTYPE_B_SHOW))
return false;
return true;
}
bool mgmtCheckDnodeInRemoveState(SDnodeObj *pDnode) {
return pDnode->lbStatus == TSDB_DN_LB_STATUS_OFFLINE_REMOVING || pDnode->lbStatus == TSDB_DN_LB_STATE_SHELL_REMOVING;
}
bool mgmtCheckDnodeInOfflineState(SDnodeObj *pDnode) {
return pDnode->status == TSDB_DN_STATUS_OFFLINE;
} }
void mgmtProcessCfgDnodeMsg(SQueuedMsg *pMsg) { void mgmtProcessCfgDnodeMsg(SQueuedMsg *pMsg) {
...@@ -553,14 +175,10 @@ void mgmtProcessDnodeStatusMsg(SRpcMsg *rpcMsg) { ...@@ -553,14 +175,10 @@ void mgmtProcessDnodeStatusMsg(SRpcMsg *rpcMsg) {
pDnode->numOfCores = htons(pStatus->numOfCores); pDnode->numOfCores = htons(pStatus->numOfCores);
pDnode->diskAvailable = pStatus->diskAvailable; pDnode->diskAvailable = pStatus->diskAvailable;
pDnode->alternativeRole = pStatus->alternativeRole; pDnode->alternativeRole = pStatus->alternativeRole;
pDnode->numOfTotalVnodes = htons(pStatus->numOfTotalVnodes);
if (pDnode->numOfTotalVnodes == 0) {
pDnode->numOfTotalVnodes = htons(pStatus->numOfTotalVnodes);
}
if (pStatus->dnodeId == 0) { if (pStatus->dnodeId == 0) {
mTrace("dnode:%d, first access, privateIp:%s, name:%s, ", pDnode->dnodeId, taosIpStr(pDnode->privateIp), pDnode->dnodeName); mTrace("dnode:%d, first access, privateIp:%s, name:%s, ", pDnode->dnodeId, taosIpStr(pDnode->privateIp), pDnode->dnodeName);
mgmtSetDnodeMaxVnodes(pDnode);
} }
int32_t openVnodes = htons(pStatus->openVnodes); int32_t openVnodes = htons(pStatus->openVnodes);
......
...@@ -14,58 +14,18 @@ ...@@ -14,58 +14,18 @@
*/ */
#define _DEFAULT_SOURCE #define _DEFAULT_SOURCE
#ifndef _GRANT
#include "os.h" #include "os.h"
#include "mgmtAcct.h" #include "taoserror.h"
#include "tlog.h"
int32_t (*mgmtCheckUserGrantFp)() = NULL; #include "mgmtGrant.h"
int32_t (*mgmtCheckDbGrantFp)() = NULL;
void (*mgmtAddTimeSeriesFp)(uint32_t timeSeriesNum) = NULL; int32_t grantInit() { return TSDB_CODE_SUCCESS; }
void (*mgmtRestoreTimeSeriesFp)(uint32_t timeSeriesNum) = NULL; void grantCleanUp() {}
int32_t (*mgmtCheckTimeSeriesFp)(uint32_t timeseries) = NULL; void grantParseParameter() { mError("can't parsed parameter k"); }
bool (*mgmtCheckExpiredFp)() = NULL; int32_t grantCheck(EGrantType grant) { return TSDB_CODE_SUCCESS; }
void grantReset(EGrantType grant, uint64_t value) {}
int32_t mgmtCheckUserGrant() { void grantAdd(EGrantType grant, uint64_t value) {}
if (mgmtCheckUserGrantFp) { void grantRestore(EGrantType grant, uint64_t value) {}
return (*mgmtCheckUserGrantFp)();
} else { #endif
return 0; \ No newline at end of file
}
}
int32_t mgmtCheckDbGrant() {
if (mgmtCheckDbGrantFp) {
return (*mgmtCheckDbGrantFp)();
} else {
return 0;
}
}
void mgmtAddTimeSeries(SAcctObj *pAcct, uint32_t timeSeriesNum) {
pAcct->acctInfo.numOfTimeSeries += timeSeriesNum;
if (mgmtAddTimeSeriesFp) {
(*mgmtAddTimeSeriesFp)(timeSeriesNum);
}
}
void mgmtRestoreTimeSeries(SAcctObj *pAcct, uint32_t timeSeriesNum) {
pAcct->acctInfo.numOfTimeSeries -= timeSeriesNum;
if (mgmtRestoreTimeSeriesFp) {
(*mgmtRestoreTimeSeriesFp)(timeSeriesNum);
}
}
int32_t mgmtCheckTimeSeries(uint32_t timeseries) {
if (mgmtCheckTimeSeriesFp) {
return (*mgmtCheckTimeSeriesFp)(timeseries);
} else {
return 0;
}
}
bool mgmtCheckExpired() {
if (mgmtCheckExpiredFp) {
return mgmtCheckExpiredFp();
} else {
return false;
}
}
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include "mgmtDClient.h" #include "mgmtDClient.h"
#include "mgmtDnode.h" #include "mgmtDnode.h"
#include "mgmtDServer.h" #include "mgmtDServer.h"
#include "mgmtGrant.h"
#include "mgmtMnode.h" #include "mgmtMnode.h"
#include "mgmtSdb.h" #include "mgmtSdb.h"
#include "mgmtVgroup.h" #include "mgmtVgroup.h"
...@@ -73,11 +74,16 @@ int32_t mgmtStartSystem() { ...@@ -73,11 +74,16 @@ int32_t mgmtStartSystem() {
return -1; return -1;
} }
if (mgmtInitAccts() < 0) { if (acctInit() < 0) {
mError("failed to init accts"); mError("failed to init accts");
return -1; return -1;
} }
if (grantInit() < 0) {
mError("failed to init grants");
return -1;
}
if (mgmtInitUsers() < 0) { if (mgmtInitUsers() < 0) {
mError("failed to init users"); mError("failed to init users");
return -1; return -1;
...@@ -138,6 +144,7 @@ void mgmtStopSystem() { ...@@ -138,6 +144,7 @@ void mgmtStopSystem() {
void mgmtCleanUpSystem() { void mgmtCleanUpSystem() {
mPrint("starting to clean up mgmt"); mPrint("starting to clean up mgmt");
grantCleanUp();
mgmtCleanupMnodes(); mgmtCleanupMnodes();
mgmtCleanupBalance(); mgmtCleanupBalance();
mgmtCleanUpShell(); mgmtCleanUpShell();
...@@ -148,7 +155,7 @@ void mgmtCleanUpSystem() { ...@@ -148,7 +155,7 @@ void mgmtCleanUpSystem() {
mgmtCleanUpDbs(); mgmtCleanUpDbs();
mgmtCleanUpDnodes(); mgmtCleanUpDnodes();
mgmtCleanUpUsers(); mgmtCleanUpUsers();
mgmtCleanUpAccts(); acctCleanUp();
taosTmrCleanUp(tsMgmtTmr); taosTmrCleanUp(tsMgmtTmr);
mPrint("mgmt is cleaned up"); mPrint("mgmt is cleaned up");
} }
......
...@@ -137,7 +137,7 @@ static void mgmtProcessMsgFromShell(SRpcMsg *rpcMsg) { ...@@ -137,7 +137,7 @@ static void mgmtProcessMsgFromShell(SRpcMsg *rpcMsg) {
return; return;
} }
if (mgmtCheckExpired()) { if (grantCheck(TSDB_GRANT_TIME) != TSDB_CODE_SUCCESS) {
mgmtSendSimpleResp(rpcMsg->handle, TSDB_CODE_GRANT_EXPIRED); mgmtSendSimpleResp(rpcMsg->handle, TSDB_CODE_GRANT_EXPIRED);
return; return;
} }
...@@ -373,12 +373,12 @@ static void mgmtProcessConnectMsg(SQueuedMsg *pMsg) { ...@@ -373,12 +373,12 @@ static void mgmtProcessConnectMsg(SQueuedMsg *pMsg) {
goto connect_over; goto connect_over;
} }
if (mgmtCheckExpired()) { if (grantCheck(TSDB_GRANT_TIME) != TSDB_CODE_SUCCESS) {
code = TSDB_CODE_GRANT_EXPIRED; code = TSDB_CODE_GRANT_EXPIRED;
goto connect_over; goto connect_over;
} }
SAcctObj *pAcct = mgmtGetAcct(pUser->acct); SAcctObj *pAcct = acctGetAcct(pUser->acct);
if (pAcct == NULL) { if (pAcct == NULL) {
code = TSDB_CODE_INVALID_ACCT; code = TSDB_CODE_INVALID_ACCT;
goto connect_over; goto connect_over;
......
...@@ -253,7 +253,7 @@ static int32_t mgmtAddSuperTableTag(SSuperTableObj *pStable, SSchema schema[], i ...@@ -253,7 +253,7 @@ static int32_t mgmtAddSuperTableTag(SSuperTableObj *pStable, SSchema schema[], i
return TSDB_CODE_APP_ERROR; return TSDB_CODE_APP_ERROR;
} }
SAcctObj *pAcct = mgmtGetAcct(pDb->cfg.acct); SAcctObj *pAcct = acctGetAcct(pDb->cfg.acct);
if (pAcct == NULL) { if (pAcct == NULL) {
mError("DB: %s not belongs to andy account", pDb->name); mError("DB: %s not belongs to andy account", pDb->name);
return TSDB_CODE_APP_ERROR; return TSDB_CODE_APP_ERROR;
...@@ -293,7 +293,7 @@ static int32_t mgmtDropSuperTableTag(SSuperTableObj *pStable, char *tagName) { ...@@ -293,7 +293,7 @@ static int32_t mgmtDropSuperTableTag(SSuperTableObj *pStable, char *tagName) {
return TSDB_CODE_APP_ERROR; return TSDB_CODE_APP_ERROR;
} }
SAcctObj *pAcct = mgmtGetAcct(pDb->cfg.acct); SAcctObj *pAcct = acctGetAcct(pDb->cfg.acct);
if (pAcct == NULL) { if (pAcct == NULL) {
mError("DB: %s not belongs to any account", pDb->name); mError("DB: %s not belongs to any account", pDb->name);
return TSDB_CODE_APP_ERROR; return TSDB_CODE_APP_ERROR;
...@@ -381,7 +381,7 @@ static int32_t mgmtAddSuperTableColumn(SSuperTableObj *pStable, SSchema schema[] ...@@ -381,7 +381,7 @@ static int32_t mgmtAddSuperTableColumn(SSuperTableObj *pStable, SSchema schema[]
return TSDB_CODE_APP_ERROR; return TSDB_CODE_APP_ERROR;
} }
SAcctObj *pAcct = mgmtGetAcct(pDb->cfg.acct); SAcctObj *pAcct = acctGetAcct(pDb->cfg.acct);
if (pAcct == NULL) { if (pAcct == NULL) {
mError("DB: %s not belongs to andy account", pDb->name); mError("DB: %s not belongs to andy account", pDb->name);
return TSDB_CODE_APP_ERROR; return TSDB_CODE_APP_ERROR;
...@@ -420,7 +420,7 @@ static int32_t mgmtDropSuperTableColumnByName(SSuperTableObj *pStable, char *col ...@@ -420,7 +420,7 @@ static int32_t mgmtDropSuperTableColumnByName(SSuperTableObj *pStable, char *col
return TSDB_CODE_APP_ERROR; return TSDB_CODE_APP_ERROR;
} }
SAcctObj *pAcct = mgmtGetAcct(pDb->cfg.acct); SAcctObj *pAcct = acctGetAcct(pDb->cfg.acct);
if (pAcct == NULL) { if (pAcct == NULL) {
mError("DB: %s not belongs to any account", pDb->name); mError("DB: %s not belongs to any account", pDb->name);
return TSDB_CODE_APP_ERROR; return TSDB_CODE_APP_ERROR;
......
...@@ -25,10 +25,9 @@ ...@@ -25,10 +25,9 @@
#include "mgmtShell.h" #include "mgmtShell.h"
#include "mgmtUser.h" #include "mgmtUser.h"
static void *tsUserSdb = NULL; void * tsUserSdb = NULL;
static int32_t tsUserUpdateSize = 0; static int32_t tsUserUpdateSize = 0;
static int32_t mgmtCreateUser(SAcctObj *pAcct, char *name, char *pass);
static int32_t mgmtDropUser(SAcctObj *pAcct, char *name); static int32_t mgmtDropUser(SAcctObj *pAcct, char *name);
static int32_t mgmtUpdateUser(SUserObj *pUser); static int32_t mgmtUpdateUser(SUserObj *pUser);
static int32_t mgmtGetUserMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn); static int32_t mgmtGetUserMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn);
...@@ -45,10 +44,10 @@ static int32_t mgmtUserActionDestroy(SSdbOperDesc *pOper) { ...@@ -45,10 +44,10 @@ static int32_t mgmtUserActionDestroy(SSdbOperDesc *pOper) {
static int32_t mgmtUserActionInsert(SSdbOperDesc *pOper) { static int32_t mgmtUserActionInsert(SSdbOperDesc *pOper) {
SUserObj *pUser = pOper->pObj; SUserObj *pUser = pOper->pObj;
SAcctObj *pAcct = mgmtGetAcct(pUser->acct); SAcctObj *pAcct = acctGetAcct(pUser->acct);
if (pAcct != NULL) { if (pAcct != NULL) {
mgmtAddUserIntoAcct(pAcct, pUser); acctAddUser(pAcct, pUser);
} }
else { else {
mError("user:%s, acct:%s info not exist in sdb", pUser->user, pUser->acct); mError("user:%s, acct:%s info not exist in sdb", pUser->user, pUser->acct);
...@@ -60,9 +59,9 @@ static int32_t mgmtUserActionInsert(SSdbOperDesc *pOper) { ...@@ -60,9 +59,9 @@ static int32_t mgmtUserActionInsert(SSdbOperDesc *pOper) {
static int32_t mgmtUserActionDelete(SSdbOperDesc *pOper) { static int32_t mgmtUserActionDelete(SSdbOperDesc *pOper) {
SUserObj *pUser = pOper->pObj; SUserObj *pUser = pOper->pObj;
SAcctObj *pAcct = mgmtGetAcct(pUser->acct); SAcctObj *pAcct = acctGetAcct(pUser->acct);
mgmtRemoveUserFromAcct(pAcct, pUser); acctRemoveUser(pAcct, pUser);
return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
} }
...@@ -115,7 +114,7 @@ int32_t mgmtInitUsers() { ...@@ -115,7 +114,7 @@ int32_t mgmtInitUsers() {
return -1; return -1;
} }
SAcctObj *pAcct = mgmtGetAcct("root"); SAcctObj *pAcct = acctGetAcct("root");
mgmtCreateUser(pAcct, "root", "taosdata"); mgmtCreateUser(pAcct, "root", "taosdata");
mgmtCreateUser(pAcct, "monitor", tsInternalPass); mgmtCreateUser(pAcct, "monitor", tsInternalPass);
mgmtCreateUser(pAcct, "_root", tsInternalPass); mgmtCreateUser(pAcct, "_root", tsInternalPass);
...@@ -155,8 +154,8 @@ static int32_t mgmtUpdateUser(SUserObj *pUser) { ...@@ -155,8 +154,8 @@ static int32_t mgmtUpdateUser(SUserObj *pUser) {
return code; return code;
} }
static int32_t mgmtCreateUser(SAcctObj *pAcct, char *name, char *pass) { int32_t mgmtCreateUser(SAcctObj *pAcct, char *name, char *pass) {
int32_t code = mgmtCheckUserLimit(pAcct); int32_t code = acctCheck(pAcct, TSDB_ACCT_USER);
if (code != 0) { if (code != 0) {
return code; return code;
} }
...@@ -171,8 +170,8 @@ static int32_t mgmtCreateUser(SAcctObj *pAcct, char *name, char *pass) { ...@@ -171,8 +170,8 @@ static int32_t mgmtCreateUser(SAcctObj *pAcct, char *name, char *pass) {
return TSDB_CODE_USER_ALREADY_EXIST; return TSDB_CODE_USER_ALREADY_EXIST;
} }
code = mgmtCheckUserGrant(); code = grantCheck(TSDB_GRANT_USER);
if (code != 0) { if (code != TSDB_CODE_SUCCESS) {
return code; return code;
} }
...@@ -482,3 +481,30 @@ static void mgmtProcessDropUserMsg(SQueuedMsg *pMsg) { ...@@ -482,3 +481,30 @@ static void mgmtProcessDropUserMsg(SQueuedMsg *pMsg) {
mgmtSendSimpleResp(pMsg->thandle, code); mgmtSendSimpleResp(pMsg->thandle, code);
} }
void mgmtDropAllUsers(SAcctObj *pAcct) {
void *pNode = NULL;
void *pLastNode = NULL;
int32_t numOfUsers = 0;
int32_t acctNameLen = strlen(pAcct->user);
SUserObj *pUser = NULL;
while (1) {
pNode = sdbFetchRow(tsUserSdb, pNode, (void **)&pUser);
if (pUser == NULL) break;
if (strncmp(pUser->acct, pAcct->user, acctNameLen) == 0) {
SSdbOperDesc oper = {
.type = SDB_OPER_TYPE_LOCAL,
.table = tsUserSdb,
.pObj = pUser,
};
sdbDeleteRow(&oper);
pNode = pLastNode;
numOfUsers++;
continue;
}
}
mTrace("acct:%s, all users is dropped from sdb", pAcct->acctId, numOfUsers);
}
\ No newline at end of file
...@@ -32,7 +32,7 @@ ...@@ -32,7 +32,7 @@
#include "mgmtTable.h" #include "mgmtTable.h"
#include "mgmtVgroup.h" #include "mgmtVgroup.h"
static void *tsVgroupSdb = NULL; void *tsVgroupSdb = NULL;
static int32_t tsVgUpdateSize = 0; static int32_t tsVgUpdateSize = 0;
static int32_t mgmtGetVgroupMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn); static int32_t mgmtGetVgroupMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn);
......
...@@ -9,25 +9,20 @@ IF ((TD_LINUX_64) OR (TD_LINUX_32 AND TD_ARM)) ...@@ -9,25 +9,20 @@ IF ((TD_LINUX_64) OR (TD_LINUX_32 AND TD_ARM))
AUX_SOURCE_DIRECTORY(src SRC) AUX_SOURCE_DIRECTORY(src SRC)
ADD_LIBRARY(tutil ${SRC}) ADD_LIBRARY(tutil ${SRC})
TARGET_LINK_LIBRARIES(tutil thirdparty pthread os m rt) TARGET_LINK_LIBRARIES(tutil thirdparty pthread os m rt)
IF (TD_CLUSTER) FIND_PATH(ICONV_INCLUDE_EXIST iconv.h /usr/include/ /usr/local/include/)
IF (ICONV_INCLUDE_EXIST)
ADD_DEFINITIONS(-DUSE_LIBICONV) ADD_DEFINITIONS(-DUSE_LIBICONV)
TARGET_LINK_LIBRARIES(tutil iconv) FIND_PATH(ICONV_LIBRARY_A_EXIST libiconv.a /usr/lib/ /usr/local/lib/ /lib64)
ELSE() FIND_PATH(ICONV_LIBRARY_SO_EXIST libiconv.so /usr/lib/ /usr/local/lib/ /lib64)
FIND_PATH(ICONV_INCLUDE_EXIST iconv.h /usr/include/ /usr/local/include/) IF (ICONV_LIBRARY_A_EXIST OR ICONV_LIBRARY_SO_EXIST)
IF (ICONV_INCLUDE_EXIST) MESSAGE(STATUS "Use the installed libiconv library")
ADD_DEFINITIONS(-DUSE_LIBICONV) TARGET_LINK_LIBRARIES(tutil iconv)
FIND_PATH(ICONV_LIBRARY_A_EXIST libiconv.a /usr/lib/ /usr/local/lib/ /lib64)
FIND_PATH(ICONV_LIBRARY_SO_EXIST libiconv.so /usr/lib/ /usr/local/lib/ /lib64)
IF (ICONV_LIBRARY_A_EXIST OR ICONV_LIBRARY_SO_EXIST)
MESSAGE(STATUS "Use the installed libiconv library")
TARGET_LINK_LIBRARIES(tutil iconv)
ELSE ()
# libiconv library is already included in GLIBC,
MESSAGE(STATUS "Use the iconv functions in GLIBC")
ENDIF ()
ELSE () ELSE ()
MESSAGE(STATUS "Failed to find iconv, use default encoding method") # libiconv library is already included in GLIBC,
MESSAGE(STATUS "Use the iconv functions in GLIBC")
ENDIF () ENDIF ()
ELSE ()
MESSAGE(STATUS "Failed to find iconv, use default encoding method")
ENDIF () ENDIF ()
ADD_SUBDIRECTORY(tests) ADD_SUBDIRECTORY(tests)
......
...@@ -81,7 +81,7 @@ float tsRatioOfQueryThreads = 0.5; ...@@ -81,7 +81,7 @@ float tsRatioOfQueryThreads = 0.5;
char tsPublicIp[TSDB_IPv4ADDR_LEN] = {0}; char tsPublicIp[TSDB_IPv4ADDR_LEN] = {0};
char tsPrivateIp[TSDB_IPv4ADDR_LEN] = {0}; char tsPrivateIp[TSDB_IPv4ADDR_LEN] = {0};
short tsNumOfVnodesPerCore = 8; short tsNumOfVnodesPerCore = 8;
short tsNumOfTotalVnodes = 0; short tsNumOfTotalVnodes = TSDB_INVALID_VNODE_NUM;
short tsCheckHeaderFile = 0; short tsCheckHeaderFile = 0;
#ifdef _TD_ARM_32_ #ifdef _TD_ARM_32_
...@@ -189,7 +189,7 @@ int tsEnableCoreFile = 0; ...@@ -189,7 +189,7 @@ int tsEnableCoreFile = 0;
int tsAnyIp = 1; int tsAnyIp = 1;
uint32_t tsPublicIpInt = 0; uint32_t tsPublicIpInt = 0;
#ifdef CLUSTER #ifdef _CLUSTER
int tsIsCluster = 1; int tsIsCluster = 1;
#else #else
int tsIsCluster = 0; int tsIsCluster = 0;
...@@ -946,7 +946,7 @@ bool tsReadGlobalConfig() { ...@@ -946,7 +946,7 @@ bool tsReadGlobalConfig() {
if (tsSecondIp[0] == 0) { if (tsSecondIp[0] == 0) {
strcpy(tsSecondIp, tsMasterIp); strcpy(tsSecondIp, tsMasterIp);
} }
taosGetSystemInfo(); taosGetSystemInfo();
tsSetLocale(); tsSetLocale();
...@@ -960,6 +960,12 @@ bool tsReadGlobalConfig() { ...@@ -960,6 +960,12 @@ bool tsReadGlobalConfig() {
tsNumOfCores = 1; tsNumOfCores = 1;
} }
if (tsNumOfTotalVnodes == TSDB_INVALID_VNODE_NUM) {
tsNumOfTotalVnodes = tsNumOfCores * tsNumOfVnodesPerCore;
tsNumOfTotalVnodes = tsNumOfTotalVnodes > TSDB_MAX_VNODES ? TSDB_MAX_VNODES : tsNumOfTotalVnodes;
tsNumOfTotalVnodes = tsNumOfTotalVnodes < TSDB_MIN_VNODES ? TSDB_MIN_VNODES : tsNumOfTotalVnodes;
}
if (strlen(tsPrivateIp) == 0) { if (strlen(tsPrivateIp) == 0) {
pError("privateIp is null"); pError("privateIp is null");
return false; return false;
...@@ -1052,12 +1058,12 @@ void tsPrintGlobalConfig() { ...@@ -1052,12 +1058,12 @@ void tsPrintGlobalConfig() {
if (tscEmbedded == 0 && !(cfg->cfgType & TSDB_CFG_CTYPE_B_CLIENT)) continue; if (tscEmbedded == 0 && !(cfg->cfgType & TSDB_CFG_CTYPE_B_CLIENT)) continue;
if (cfg->cfgType & TSDB_CFG_CTYPE_B_NOT_PRINT) continue; if (cfg->cfgType & TSDB_CFG_CTYPE_B_NOT_PRINT) continue;
if (cfg->cfgType & TSDB_CFG_CTYPE_B_LITE) { if (cfg->cfgType & TSDB_CFG_CTYPE_B_LITE) {
#ifdef CLUSTER #ifdef _CLUSTER
continue; continue;
#endif #endif
} }
if (cfg->cfgType & TSDB_CFG_CTYPE_B_CLUSTER) { if (cfg->cfgType & TSDB_CFG_CTYPE_B_CLUSTER) {
#ifndef CLUSTER #ifndef _CLUSTER
continue; continue;
#endif #endif
} }
......
char version[64] = "1.6.5.4"; char version[64] = "2.0.0.0";
char compatible_version[64] = "1.6.1.0"; char compatible_version[64] = "2.0.0.0";
char gitinfo[128] = "3264067e97300c84caa61ac909d548c9ca56de6b"; char gitinfo[128] = "3264067e97300c84caa61ac909d548c9ca56de6b";
char gitinfoOfInternal[128] = "da88f4a2474737d1f9c76adcf0ff7fd0975e7342"; char gitinfoOfInternal[128] = "da88f4a2474737d1f9c76adcf0ff7fd0975e7342";
char buildinfo[512] = "Built by root at 2020-02-05 14:38"; char buildinfo[512] = "Built by root at 2020-04-01 14:38";
void libtaos_1_6_5_4_Linux_x64() {}; void libtaos_1_6_5_4_Linux_x64() {};
CMAKE_MINIMUM_REQUIRED(VERSION 2.8)
PROJECT(TDengine)
IF ((TD_LINUX_64) OR (TD_LINUX_32 AND TD_ARM))
INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/src/inc)
INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/src/dnode/inc)
INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/src/mnode/detail/inc)
INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/src/vnode/detail/inc)
INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/src/client/inc)
INCLUDE_DIRECTORIES(${TD_OS_DIR}/inc)
INCLUDE_DIRECTORIES(inc)
AUX_SOURCE_DIRECTORY(./src SRC)
LIST(REMOVE_ITEM SRC ./src/vnodeFileUtil.c)
LIST(REMOVE_ITEM SRC ./src/taosGrant.c)
ADD_LIBRARY(vnode ${SRC})
IF (TD_CLUSTER)
TARGET_LINK_LIBRARIES(vnode vcluster)
ELSEIF (TD_LITE)
TARGET_LINK_LIBRARIES(vnode vlite)
ENDIF ()
ENDIF ()
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_VNODE_H
#define TDENGINE_VNODE_H
#ifdef __cplusplus
extern "C" {
#endif
#include "os.h"
#include "tglobalcfg.h"
#include "tidpool.h"
#include "tlog.h"
#include "tmempool.h"
#include "trpc.h"
#include "tsclient.h"
#include "taosdef.h"
#include "tsocket.h"
#include "ttime.h"
#include "ttimer.h"
#include "tutil.h"
#include "vnodeCache.h"
#include "vnodeFile.h"
#include "vnodePeer.h"
#include "vnodeShell.h"
#define TSDB_FILE_HEADER_LEN 512
#define TSDB_FILE_HEADER_VERSION_SIZE 32
#define TSDB_CACHE_POS_BITS 13
#define TSDB_CACHE_POS_MASK 0x1FFF
#define TSDB_ACTION_INSERT 0
#define TSDB_ACTION_IMPORT 1
#define TSDB_ACTION_DELETE 2
#define TSDB_ACTION_UPDATE 3
#define TSDB_ACTION_MAX 4
enum _data_source {
TSDB_DATA_SOURCE_METER,
TSDB_DATA_SOURCE_VNODE,
TSDB_DATA_SOURCE_SHELL,
TSDB_DATA_SOURCE_QUEUE,
TSDB_DATA_SOURCE_LOG,
};
enum _sync_cmd {
TSDB_SYNC_CMD_FILE,
TSDB_SYNC_CMD_CACHE,
TSDB_SYNC_CMD_CREATE,
TSDB_SYNC_CMD_REMOVE,
};
typedef struct {
int64_t offset : 48;
int64_t length : 16;
} SMeterObjHeader;
typedef struct {
int64_t len;
char data[];
} SData;
#pragma pack(push, 8)
typedef struct {
SVnodeStatisticInfo vnodeStatistic;
int vnode;
SVnodeCfg cfg;
// SDiskDesc tierDisk[TSDB_MAX_TIER];
SVPeerDesc vpeers[TSDB_VNODES_SUPPORT];
SVnodePeer * peerInfo[TSDB_VNODES_SUPPORT];
char selfIndex;
char vnodeStatus;
char accessState; // Vnode access state, Readable/Writable
char syncStatus;
char commitInProcess;
pthread_t commitThread;
TSKEY firstKey; // minimum key uncommitted, it may be smaller than
// commitFirstKey
TSKEY commitFirstKey; // minimum key for a commit file, it shall be
// xxxx00000, calculated from fileId
TSKEY commitLastKey; // maximum key for a commit file, it shall be xxxx99999,
// calculated fromm fileId
int commitFileId;
TSKEY lastCreate;
TSKEY lastRemove;
TSKEY lastKey; // last key for the whole vnode, updated by every insert
// operation
uint64_t version;
int streamRole;
int numOfStreams;
void *streamTimer;
TSKEY lastKeyOnFile; // maximum key on the last file, is shall be xxxx99999
int fileId;
int badFileId;
int numOfFiles;
int maxFiles;
int maxFile1;
int maxFile2;
int nfd; // temp head file FD
int hfd; // head file FD
int lfd; // last file FD
int tfd; // temp last file FD
int dfd; // data file FD
int64_t dfSize;
int64_t lfSize;
uint64_t * fmagic; // hold magic number for each file
char cfn[TSDB_FILENAME_LEN];
char nfn[TSDB_FILENAME_LEN];
char lfn[TSDB_FILENAME_LEN]; // last file name
char tfn[TSDB_FILENAME_LEN]; // temp last file name
pthread_mutex_t vmutex;
int logFd;
char * pMem;
char * pWrite;
pthread_mutex_t logMutex;
char logFn[TSDB_FILENAME_LEN];
char logOFn[TSDB_FILENAME_LEN];
int64_t mappingSize;
int64_t mappingThreshold;
void * commitTimer;
void ** meterList;
void * pCachePool;
void * pQueue;
pthread_t thread;
int peersOnline;
int shellConns;
int meterConns;
struct _qinfo *pQInfoList;
TAOS * dbConn;
SMeterObjHeader *meterIndex;
} SVnodeObj;
#pragma pack(pop)
typedef struct SColumn {
short colId;
short bytes;
char type;
} SColumn;
typedef struct _meter_obj {
uint64_t uid;
char meterId[TSDB_TABLE_ID_LEN];
int sid;
short vnode;
short numOfColumns;
short bytesPerPoint;
short maxBytes;
int32_t pointsPerBlock;
int32_t pointsPerFileBlock;
int freePoints;
TSKEY lastKey; // updated by insert operation
TSKEY lastKeyOnFile; // last key on file, updated by commit action
TSKEY timeStamp; // delete or added time
uint64_t commitCount;
int32_t sversion;
short sqlLen;
char searchAlgorithm : 4;
char compAlgorithm : 4;
char status; // 0: ok, 1: stop stream computing
char reserved[16];
int state;
int numOfQueries;
char * pSql;
void * pStream;
void * pCache;
SColumn *schema;
} SMeterObj;
typedef struct {
char type;
char pversion; // protocol version
char action; // insert, import, delete, update
int32_t sversion; // only for insert
int32_t sid;
int32_t len;
uint64_t lastVersion; // latest version
char cont[];
} SVMsgHeader;
struct tSQLBinaryExpr;
typedef struct SColumnInfoEx {
SColumnInfo data;
int16_t colIdx;
int16_t colIdxInBuf;
/*
* 0: denotes if its is required in the first round of scan of data block
* 1: denotes if its is required in the secondary scan
*/
int16_t req[2];
} SColumnInfoEx;
struct SColumnFilterElem;
typedef bool (*__filter_func_t)(struct SColumnFilterElem *pFilter, char *val1, char *val2);
typedef struct SColumnFilterElem {
int16_t bytes; // column length
__filter_func_t fp;
SColumnFilterInfo filterInfo;
} SColumnFilterElem;
typedef struct SSingleColumnFilterInfo {
SColumnInfoEx info;
int32_t numOfFilters;
SColumnFilterElem *pFilters;
char * pData;
} SSingleColumnFilterInfo;
typedef struct SQuery {
short numOfCols;
SOrderVal order;
char keyIsMet; // if key is met, it will be set
char over;
int fileId; // only for query in file
int hfd; // only for query in file, head file handle
int dfd; // only for query in file, data file handle
int lfd; // only for query in file, last file handle
SCompBlock *pBlock; // only for query in file
SField ** pFields;
int numOfBlocks; // only for query in file
int blockBufferSize; // length of pBlock buffer
int currentSlot;
int firstSlot;
/*
* the two parameters are utilized to handle the data missing situation, caused by import operation.
* When the commit slot is the first slot, and commitPoints != 0
*/
int32_t commitSlot; // which slot is committed,
int32_t commitPoint; // starting point for next commit
int slot;
int pos;
TSKEY key;
int compBlockLen; // only for import
int64_t blockId;
TSKEY skey;
TSKEY ekey;
int64_t intervalTime;
int64_t slidingTime; // sliding time for sliding window query
char intervalTimeUnit; // interval data type, used for daytime revise
int8_t precision;
int16_t numOfOutputCols;
int16_t interpoType;
int16_t checkBufferInLoop; // check if the buffer is full during scan each block
SLimitVal limit;
int32_t rowSize;
SSqlGroupbyExpr * pGroupbyExpr;
SSqlFunctionExpr * pSelectExpr;
SColumnInfoEx * colList;
int32_t numOfFilterCols;
SSingleColumnFilterInfo *pFilterInfo;
int64_t * defaultVal;
TSKEY lastKey;
// buffer info
int64_t pointsRead; // the number of points returned
int64_t pointsToRead; // maximum number of points to read
int64_t pointsOffset; // the number of points offset to save read data
SData **sdata;
SData * tsData; // timestamp column/primary key column
} SQuery;
typedef struct {
char spi;
char encrypt;
char secret[TSDB_KEY_LEN];
char cipheringKey[TSDB_KEY_LEN];
} SConnSec;
typedef struct {
char * buffer;
char * offset;
int trans;
int bufferSize;
pthread_mutex_t qmutex;
} STranQueue;
// internal globals
extern int tsMeterSizeOnFile;
extern void * tsQueryQhandle;
extern int tsVnodePeers;
extern int tsMaxVnode;
extern int tsMaxQueues;
extern int tsOpenVnodes;
extern SVnodeObj *vnodeList;
extern void * vnodeTmrCtrl;
// read API
extern int (*vnodeSearchKeyFunc[])(char *pValue, int num, TSKEY key, int order);
void *vnodeQueryOnSingleTable(SMeterObj **pMeterObj, SSqlGroupbyExpr *pGroupbyExpr, SSqlFunctionExpr *sqlExprs,
SQueryMeterMsg *pQueryMsg, int *code);
void *vnodeQueryOnMultiMeters(SMeterObj **pMeterObj, SSqlGroupbyExpr *pGroupbyExpr, SSqlFunctionExpr *pSqlExprs,
SQueryMeterMsg *pQueryMsg, int *code);
// assistant/tool functions
SSqlGroupbyExpr *vnodeCreateGroupbyExpr(SQueryMeterMsg *pQuery, int32_t *code);
SSqlFunctionExpr *vnodeCreateSqlFunctionExpr(SQueryMeterMsg *pQuery, int32_t *code);
bool vnodeValidateExprColumnInfo(SQueryMeterMsg *pQueryMsg, SSqlFuncExprMsg *pExprMsg);
bool vnodeIsValidVnodeCfg(SVnodeCfg *pCfg);
int32_t vnodeGetResultSize(void *handle, int32_t *numOfRows);
int32_t vnodeCopyQueryResultToMsg(void *handle, char *data, int32_t numOfRows);
int64_t vnodeGetOffsetVal(void *thandle);
bool vnodeHasRemainResults(void *handle);
int vnodeRetrieveQueryResult(void *handle, int *pNum, char *argv[]);
int vnodeSaveQueryResult(void *handle, char *data, int32_t* size);
int vnodeRetrieveQueryInfo(void *handle, int *numOfRows, int *rowSize, int16_t *timePrec);
void vnodeFreeQInfo(void *, bool);
void vnodeFreeQInfoInQueue(void *param);
bool vnodeIsQInfoValid(void *param);
void vnodeDecRefCount(void *param);
void vnodeAddRefCount(void *param);
int32_t vnodeConvertQueryMeterMsg(SQueryMeterMsg *pQuery);
void vnodeQueryData(SSchedMsg *pMsg);
// meter API
int vnodeOpenMetersVnode(int vnode);
void vnodeCloseMetersVnode(int vnode);
int vnodeCreateMeterObj(SMeterObj *pNew, SConnSec *pSec);
int vnodeRemoveMeterObj(int vnode, int sid);
int vnodeInsertPoints(SMeterObj *pObj, char *cont, int contLen, char source, void *, int sversion, int *numOfPoints, TSKEY now);
int vnodeImportPoints(SMeterObj *pObj, char *cont, int contLen, char source, void *, int sversion, int *numOfPoints, TSKEY now);
int vnodeInsertBufferedPoints(int vnode);
int vnodeSaveAllMeterObjToFile(int vnode);
int vnodeSaveMeterObjToFile(SMeterObj *pObj);
int vnodeSaveVnodeCfg(int vnode, SVnodeCfg *pCfg, SVPeerDesc *pDesc);
int vnodeSaveVnodeInfo(int vnode);
// cache API
void *vnodeOpenCachePool(int vnode);
void vnodeCloseCachePool(int vnode);
void *vnodeAllocateCacheInfo(SMeterObj *pObj);
void vnodeFreeCacheInfo(SMeterObj *pObj);
void vnodeSetCommitQuery(SMeterObj *pObj, SQuery *pQuery);
int vnodeInsertPointToCache(SMeterObj *pObj, char *pData);
int vnodeQueryFromCache(SMeterObj *pObj, SQuery *pQuery);
uint64_t vnodeGetPoolCount(SVnodeObj *pVnode);
void vnodeUpdateCommitInfo(SMeterObj *pObj, int slot, int pos, uint64_t count);
void vnodeCommitOver(SVnodeObj *pVnode);
TSKEY vnodeGetFirstKey(int vnode);
int vnodeSyncRetrieveCache(int vnode, int fd);
int vnodeSyncRestoreCache(int vnode, int fd);
pthread_t vnodeCreateCommitThread(SVnodeObj *pVnode);
void vnodeCancelCommit(SVnodeObj *pVnode);
void vnodeCloseStream(SVnodeObj *pVnode);
void vnodeProcessCommitTimer(void *param, void *tmrId);
void vnodeSearchPointInCache(SMeterObj *pObj, SQuery *pQuery);
int vnodeAllocateCacheBlock(SMeterObj *pObj);
int vnodeFreeCacheBlock(SCacheBlock *pCacheBlock);
int vnodeIsCacheCommitted(SMeterObj *pObj);
// file API
int vnodeInitFile(int vnode);
int vnodeQueryFromFile(SMeterObj *pObj, SQuery *pQuery);
void *vnodeCommitToFile(void *param);
void *vnodeCommitMultiToFile(SVnodeObj *pVnode, int ssid, int esid);
int vnodeSyncRetrieveFile(int vnode, int fd, uint32_t fileId, uint64_t *fmagic);
int vnodeSyncRestoreFile(int vnode, int sfd);
int vnodeWriteBlockToFile(SMeterObj *pObj, SCompBlock *pBlock, SData *data[], SData *cdata[], int pointsRead);
int vnodeSearchPointInFile(SMeterObj *pObj, SQuery *pQuery);
int vnodeReadCompBlockToMem(SMeterObj *pObj, SQuery *pQuery, SData *sdata[]);
int vnodeOpenCommitFiles(SVnodeObj *pVnode, int noTempLast);
void vnodeCloseCommitFiles(SVnodeObj *pVnode);
int vnodeReadLastBlockToMem(SMeterObj *pObj, SCompBlock *pBlock, SData *sdata[]);
// vnode API
void vnodeUpdateStreamRole(SVnodeObj *pVnode);
int vnodeInitPeer(int numOfThreads);
void vnodeCleanUpPeer();
int vnodeOpenPeerVnode(int vnode);
void vnodeClosePeerVnode(int vnode);
void *vnodeGetMeterPeerConnection(SMeterObj *pObj, int index);
int vnodeForwardToPeer(SMeterObj *pObj, char *msg, int msgLen, char action, int sversion);
void vnodeCloseAllSyncFds(int vnode);
void vnodeConfigVPeers(int vnode, int numOfPeers, SVPeerDesc peerDesc[]);
void vnodeStartSyncProcess(SVnodeObj *pVnode);
void vnodeCancelSync(int vnode);
void vnodeListPeerStatus(char *buffer);
void vnodeCheckOwnStatus(SVnodeObj *pVnode);
int vnodeSaveMeterObjToFile(SMeterObj *pObj);
int vnodeRecoverFromPeer(SVnodeObj *pVnode, int fileId);
// vnodes API
int vnodeInitVnodes();
int vnodeInitStore();
void vnodeCleanUpVnodes();
int vnodeRemoveVnode(int vnode);
int vnodeCreateVnode(int vnode, SVnodeCfg *pCfg, SVPeerDesc *pDesc);
void vnodeOpenStreams(void *param, void *tmrId);
void vnodeCreateStream(SMeterObj *pObj);
void vnodeRemoveStream(SMeterObj *pObj);
// shell API
int vnodeInitShell();
void vnodeCleanUpShell();
int vnodeOpenShellVnode(int vnode);
void vnodeCloseShellVnode(int vnode);
// memter mgmt
int vnodeInitMeterMgmt();
void vnodeCleanUpMeterMgmt();
int vnodeOpenMeterMgmtVnode(int vnode);
int vnodeOpenMeterMgmtStoreVnode(int vnode);
void vnodeCloseMeterMgmtVnode(int vnode);
int vnodeCreateMeterMgmt(SMeterObj *pObj, SConnSec *pSec);
void vnodeRemoveMeterMgmt(SMeterObj *pObj);
SConnSec *vnodeGetMeterSec(int vnode, int sid);
int vnodeCreateMeterObjFile(int vnode);
// mgmt
void vnodeCleanUpMgmt();
int vnodeRetrieveMissedCreateMsg(int vnode, int fd, uint64_t stime);
int vnodeRestoreMissedCreateMsg(int vnode, int fd);
int vnodeRetrieveMissedRemoveMsg(int vid, int fd, uint64_t stime);
int vnodeRestoreMissedRemoveMsg(int vnode, int fd);
int vnodeProcessBufferedCreateMsgs(int vnode);
void vnodeSendVpeerCfgMsg(int vnode);
int vnodeSendMeterCfgMsg(int vnode, int sid);
int vnodeMgmtConns();
void vnodeRemoveFile(int vnode, int fileId);
// commit
int vnodeInitCommit(int vnode);
void vnodeCleanUpCommit(int vnode);
int vnodeRenewCommitLog(int vnode);
void vnodeRemoveCommitLog(int vnode);
int vnodeWriteToCommitLog(SMeterObj *pObj, char action, char *cont, int contLen, int sversion);
extern int (*vnodeProcessAction[])(SMeterObj *, char *, int, char, void *, int, int *, TSKEY);
extern int (*pCompFunc[])(const char *const input, int inputSize, const int elements, char *const output,
int outputSize, char algorithm, char *const buffer, int bufferSize);
extern int (*pDecompFunc[])(const char *const input, int compressedSize, const int elements, char *const output,
int outputSize, char algorithm, char *const buffer, int bufferSize);
// global variable and APIs provided by mgmt
extern char mgmtStatus;
extern char tsMgmtDirectory[];
extern const int16_t vnodeFileVersion;
#ifdef __cplusplus
}
#endif
#endif // TDENGINE_VNODE_H
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_VNODECACHE_H
#define TDENGINE_VNODECACHE_H
#ifdef __cplusplus
extern "C" {
#endif
typedef struct {
short notFree;
short numOfPoints;
int slot;
int index;
int64_t blockId;
struct _meter_obj *pMeterObj;
char * offset[];
} SCacheBlock;
typedef struct {
int64_t blocks;
int maxBlocks;
int numOfBlocks;
int unCommittedBlocks;
int32_t currentSlot;
int32_t commitSlot; // which slot is committed
int32_t commitPoint; // starting point for next commit
SCacheBlock **cacheBlocks; // cache block list, circular list
} SCacheInfo;
typedef struct {
int vnode;
char ** pMem;
int64_t freeSlot;
pthread_mutex_t vmutex;
uint64_t count; // kind of transcation ID
int64_t notFreeSlots;
int64_t threshold;
char commitInProcess;
int cacheBlockSize;
int cacheNumOfBlocks;
} SCachePool;
#ifdef __cplusplus
}
#endif
#endif // TDENGINE_VNODECACHE_H
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_VNODEDATAFILTERFUNC_H
#define TDENGINE_VNODEDATAFILTERFUNC_H
#ifdef __cplusplus
extern "C" {
#endif
#include "vnode.h"
__filter_func_t *vnodeGetRangeFilterFuncArray(int32_t type);
__filter_func_t *vnodeGetValueFilterFuncArray(int32_t type);
bool vnodeSupportPrefilter(int32_t type);
#ifdef __cplusplus
}
#endif
#endif // TDENGINE_VNODEDATAFILTERFUNC_H
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_VNODEFILE_H
#define TDENGINE_VNODEFILE_H
#ifdef __cplusplus
extern "C" {
#endif
#include "tchecksum.h"
#define TSDB_VNODE_DELIMITER 0xF00AFA0F
typedef struct { int64_t compInfoOffset; } SCompHeader;
typedef struct {
short colId;
short bytes;
int32_t numOfNullPoints;
int32_t type : 8;
int32_t offset : 24;
int32_t len; // data length
int64_t sum;
int64_t max;
int64_t min;
int16_t maxIndex;
int16_t minIndex;
char reserved[20];
} SField;
typedef struct {
int64_t last : 1;
int64_t offset : 63;
int32_t algorithm : 8; // compression algorithm can be changed
int32_t numOfPoints : 24; // how many points have been written into this block
int32_t sversion;
int32_t len; // total length of this data block
uint16_t numOfCols;
char reserved[16];
TSKEY keyFirst; // time stamp for the first point
TSKEY keyLast; // time stamp for the last point
} SCompBlock;
typedef struct {
SCompBlock *compBlock;
SField * fields;
} SCompBlockFields;
typedef struct {
uint64_t uid;
int64_t last : 1;
int64_t numOfBlocks : 62;
uint32_t delimiter; // delimiter for recovery
TSCKSUM checksum;
SCompBlock compBlocks[]; // comp block list
} SCompInfo;
typedef struct {
int64_t tempHeadOffset;
int64_t compInfoOffset;
int64_t oldCompBlockOffset;
int64_t oldNumOfBlocks;
int64_t newNumOfBlocks;
int64_t finalNumOfBlocks;
int64_t oldCompBlockLen;
int64_t newCompBlockLen;
int64_t finalCompBlockLen;
int64_t committedPoints;
int commitSlot;
int32_t last : 1;
int32_t changed : 1;
int32_t commitPos : 30;
int64_t commitCount;
SCompBlock lastBlock;
} SMeterInfo;
typedef struct { int64_t totalStorage; } SVnodeHeadInfo;
#ifdef __cplusplus
}
#endif
#endif // TDENGINE_VNODEFILE_H
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_VNODEQUERYIMPL_H
#define TDENGINE_VNODEQUERYIMPL_H
#ifdef __cplusplus
extern "C" {
#endif
#include "os.h"
#include "hash.h"
#include "hashfunc.h"
#define GET_QINFO_ADDR(x) ((char*)(x)-offsetof(SQInfo, query))
#define Q_STATUS_EQUAL(p, s) (((p) & (s)) != 0)
/*
* set the output buffer page size is 16k
* The page size should be sufficient for at least one output result or intermediate result.
* Some intermediate results may be extremely large, such as top/bottom(100) query.
*/
#define DEFAULT_INTERN_BUF_SIZE 16384L
#define INIT_ALLOCATE_DISK_PAGES 60L
#define DEFAULT_DATA_FILE_MAPPING_PAGES 2L
#define DEFAULT_DATA_FILE_MMAP_WINDOW_SIZE (DEFAULT_DATA_FILE_MAPPING_PAGES * DEFAULT_INTERN_BUF_SIZE)
#define IO_ENGINE_MMAP 0
#define IO_ENGINE_SYNC 1
#define DEFAULT_IO_ENGINE IO_ENGINE_SYNC
/**
* check if the primary column is load by default, otherwise, the program will
* forced to load primary column explicitly.
*/
#define PRIMARY_TSCOL_LOADED(query) ((query)->colList[0].data.colId == PRIMARYKEY_TIMESTAMP_COL_INDEX)
typedef enum {
/*
* the program will call this function again, if this status is set.
* used to transfer from QUERY_RESBUF_FULL
*/
QUERY_NOT_COMPLETED = 0x1u,
/*
* output buffer is full, so, the next query will be employed,
* in this case, we need to set the appropriated start scan point for
* the next query.
*
* this status is only exist in group-by clause and
* diff/add/division/multiply/ query.
*/
QUERY_RESBUF_FULL = 0x2u,
/*
* query is over
* 1. this status is used in one row result query process, e.g.,
* count/sum/first/last/
* avg...etc.
* 2. when the query range on timestamp is satisfied, it is also denoted as
* query_compeleted
*/
QUERY_COMPLETED = 0x4u,
/*
* all data has been scanned, so current search is stopped,
* At last, the function will transfer this status to QUERY_COMPLETED
*/
QUERY_NO_DATA_TO_CHECK = 0x8u,
} vnodeQueryStatus;
typedef struct SPointInterpoSupporter {
int32_t numOfCols;
char** pPrevPoint;
char** pNextPoint;
} SPointInterpoSupporter;
typedef struct SBlockInfo {
TSKEY keyFirst;
TSKEY keyLast;
int32_t numOfCols;
int32_t size;
} SBlockInfo;
typedef struct SMeterDataBlockInfoEx {
SCompBlockFields pBlock;
SMeterDataInfo* pMeterDataInfo;
int32_t blockIndex;
int32_t groupIdx; /* number of group is less than the total number of meters */
} SMeterDataBlockInfoEx;
typedef enum {
DISK_DATA_LOAD_FAILED = -0x1,
DISK_DATA_LOADED = 0x0,
DISK_DATA_DISCARDED = 0x01,
} vnodeDiskLoadStatus;
#define IS_MASTER_SCAN(runtime) (((runtime)->scanFlag & 1u) == MASTER_SCAN)
#define IS_SUPPLEMENT_SCAN(runtime) ((runtime)->scanFlag == SUPPLEMENTARY_SCAN)
#define SET_SUPPLEMENT_SCAN_FLAG(runtime) ((runtime)->scanFlag = SUPPLEMENTARY_SCAN)
#define SET_MASTER_SCAN_FLAG(runtime) ((runtime)->scanFlag = MASTER_SCAN)
typedef int (*__block_search_fn_t)(char* data, int num, int64_t key, int order);
static FORCE_INLINE SMeterObj* getMeterObj(void* hashHandle, int32_t sid) {
return *(SMeterObj**)taosHashGet(hashHandle, (const char*)&sid, sizeof(sid));
}
bool isQueryKilled(SQuery* pQuery);
bool isFixedOutputQuery(SQuery* pQuery);
bool isPointInterpoQuery(SQuery* pQuery);
bool isSumAvgRateQuery(SQuery *pQuery);
bool isTopBottomQuery(SQuery* pQuery);
bool isFirstLastRowQuery(SQuery* pQuery);
bool isTSCompQuery(SQuery* pQuery);
bool notHasQueryTimeRange(SQuery* pQuery);
bool needSupplementaryScan(SQuery* pQuery);
bool onDemandLoadDatablock(SQuery* pQuery, int16_t queryRangeSet);
void setQueryStatus(SQuery* pQuery, int8_t status);
bool doRevisedResultsByLimit(SQInfo* pQInfo);
void truncateResultByLimit(SQInfo* pQInfo, int64_t* final, int32_t* interpo);
void initCtxOutputBuf(SQueryRuntimeEnv* pRuntimeEnv);
void resetCtxOutputBuf(SQueryRuntimeEnv* pRuntimeEnv);
void forwardCtxOutputBuf(SQueryRuntimeEnv* pRuntimeEnv, int64_t output);
bool needPrimaryTimestampCol(SQuery* pQuery, SBlockInfo* pBlockInfo);
void vnodeScanAllData(SQueryRuntimeEnv* pRuntimeEnv);
int32_t vnodeQueryResultInterpolate(SQInfo* pQInfo, tFilePage** pDst, tFilePage** pDataSrc, int32_t numOfRows,
int32_t* numOfInterpo);
void copyResToQueryResultBuf(STableQuerySupportObj* pSupporter, SQuery* pQuery);
void doSkipResults(SQueryRuntimeEnv* pRuntimeEnv);
void doFinalizeResult(SQueryRuntimeEnv* pRuntimeEnv);
int64_t getNumOfResult(SQueryRuntimeEnv* pRuntimeEnv);
void forwardQueryStartPosition(SQueryRuntimeEnv* pRuntimeEnv);
bool normalizedFirstQueryRange(bool dataInDisk, bool dataInCache, STableQuerySupportObj* pSupporter,
SPointInterpoSupporter* pPointInterpSupporter, int64_t* key);
void pointInterpSupporterInit(SQuery* pQuery, SPointInterpoSupporter* pInterpoSupport);
void pointInterpSupporterDestroy(SPointInterpoSupporter* pPointInterpSupport);
void pointInterpSupporterSetData(SQInfo* pQInfo, SPointInterpoSupporter* pPointInterpSupport);
int64_t loadRequiredBlockIntoMem(SQueryRuntimeEnv* pRuntimeEnv, SPositionInfo* position);
void disableFunctForSuppleScan(STableQuerySupportObj* pSupporter, int32_t order);
void enableFunctForMasterScan(SQueryRuntimeEnv* pRuntimeEnv, int32_t order);
int32_t mergeMetersResultToOneGroups(STableQuerySupportObj* pSupporter);
void copyFromWindowResToSData(SQInfo* pQInfo, SWindowResult* result);
SBlockInfo getBlockInfo(SQueryRuntimeEnv *pRuntimeEnv);
SBlockInfo getBlockBasicInfo(SQueryRuntimeEnv *pRuntimeEnv, void* pBlock, int32_t type);
SCacheBlock* getCacheDataBlock(SMeterObj* pMeterObj, SQueryRuntimeEnv* pRuntimeEnv, int32_t slot);
void stableApplyFunctionsOnBlock(STableQuerySupportObj* pSupporter, SMeterDataInfo* pMeterDataInfo,
SBlockInfo* pBlockInfo, SField* pFields, __block_search_fn_t searchFn);
int32_t vnodeFilterQualifiedMeters(SQInfo* pQInfo, int32_t vid, tSidSet* pSidSet, SMeterDataInfo* pMeterDataInfo,
int32_t* numOfMeters, SMeterDataInfo*** pReqMeterDataInfo);
int32_t vnodeGetVnodeHeaderFileIndex(int32_t* fid, SQueryRuntimeEnv* pRuntimeEnv, int32_t order);
int32_t createDataBlocksInfoEx(SMeterDataInfo** pMeterDataInfo, int32_t numOfMeters,
SMeterDataBlockInfoEx** pDataBlockInfoEx, int32_t numOfCompBlocks,
int32_t* nAllocBlocksInfoSize, int64_t addr);
void freeMeterBlockInfoEx(SMeterDataBlockInfoEx* pDataBlockInfoEx, int32_t len);
void setExecutionContext(STableQuerySupportObj* pSupporter, SMeterQueryInfo* pMeterQueryInfo, int32_t meterIdx,
int32_t groupIdx, TSKEY nextKey);
int32_t setAdditionalInfo(STableQuerySupportObj *pSupporter, int32_t meterIdx, SMeterQueryInfo *pMeterQueryInfo);
void doGetAlignedIntervalQueryRangeImpl(SQuery* pQuery, int64_t pKey, int64_t keyFirst, int64_t keyLast,
int64_t* actualSkey, int64_t* actualEkey, int64_t* skey, int64_t* ekey);
int64_t getQueryStartPositionInCache(SQueryRuntimeEnv* pRuntimeEnv, int32_t* slot, int32_t* pos, bool ignoreQueryRange);
int32_t getDataBlocksForMeters(STableQuerySupportObj* pSupporter, SQuery* pQuery, int32_t numOfMeters,
const char* filePath, SMeterDataInfo** pMeterDataInfo, uint32_t* numOfBlocks);
int32_t LoadDatablockOnDemand(SCompBlock* pBlock, SField** pFields, uint8_t* blkStatus, SQueryRuntimeEnv* pRuntimeEnv,
int32_t fileIdx, int32_t slotIdx, __block_search_fn_t searchFn, bool onDemand);
int32_t vnodeGetHeaderFile(SQueryRuntimeEnv* pRuntimeEnv, int32_t fileIndex);
/**
* Create SMeterQueryInfo.
* The MeterQueryInfo is created one for each table during super table query
*
* @param skey
* @param ekey
* @return
*/
SMeterQueryInfo* createMeterQueryInfo(STableQuerySupportObj* pSupporter, int32_t sid, TSKEY skey, TSKEY ekey);
/**
* Destroy meter query info
* @param pMeterQInfo
* @param numOfCols
*/
void destroyMeterQueryInfo(SMeterQueryInfo* pMeterQueryInfo, int32_t numOfCols);
/**
* change the meter query info for supplement scan
* @param pMeterQueryInfo
* @param skey
* @param ekey
*/
void changeMeterQueryInfoForSuppleQuery(SQuery* pQuery, SMeterQueryInfo* pMeterQueryInfo,
TSKEY skey, TSKEY ekey);
/**
* add the new allocated disk page to meter query info
* the new allocated disk page is used to keep the intermediate (interval) results
* @param pQuery
* @param pMeterQueryInfo
* @param pSupporter
*/
tFilePage* addDataPageForMeterQueryInfo(SQuery* pQuery, SMeterQueryInfo* pMeterQueryInfo,
STableQuerySupportObj* pSupporter);
/**
* restore the query range data from SMeterQueryInfo to runtime environment
*
* @param pRuntimeEnv
* @param pMeterQueryInfo
*/
void restoreIntervalQueryRange(SQueryRuntimeEnv* pRuntimeEnv, SMeterQueryInfo* pMeterQueryInfo);
/**
* set the interval query range for the interval query, when handling a data(cache) block
*
* @param pMeterQueryInfo
* @param pSupporter
* @param key
*/
void setIntervalQueryRange(SMeterQueryInfo* pMeterQueryInfo, STableQuerySupportObj* pSupporter, int64_t key);
/**
* set the meter data information
* @param pMeterDataInfo
* @param pMeterObj current query meter object
* @param meterIdx meter index in the sid list
* @param groupId group index, which the meter is belonged to
*/
void setMeterDataInfo(SMeterDataInfo* pMeterDataInfo, SMeterObj* pMeterObj, int32_t meterIdx, int32_t groupId);
void vnodeSetTagValueInParam(tSidSet* pSidSet, SQueryRuntimeEnv* pRuntimeEnv, SMeterSidExtInfo* pMeterInfo);
void vnodeCheckIfDataExists(SQueryRuntimeEnv* pRuntimeEnv, SMeterObj* pMeterObj, bool* dataInDisk, bool* dataInCache);
void displayInterResult(SData** pdata, SQuery* pQuery, int32_t numOfRows);
void vnodePrintQueryStatistics(STableQuerySupportObj* pSupporter);
void clearTimeWindowResBuf(SQueryRuntimeEnv* pRuntimeEnv, SWindowResult* pOneOutputRes);
void copyTimeWindowResBuf(SQueryRuntimeEnv* pRuntimeEnv, SWindowResult* dst, const SWindowResult* src);
int32_t initWindowResInfo(SWindowResInfo* pWindowResInfo, SQueryRuntimeEnv* pRuntimeEnv, int32_t size,
int32_t threshold, int16_t type);
void cleanupTimeWindowInfo(SWindowResInfo* pWindowResInfo, SQueryRuntimeEnv* pRuntimeEnv);
void resetTimeWindowInfo(SQueryRuntimeEnv* pRuntimeEnv, SWindowResInfo* pWindowResInfo);
void clearFirstNTimeWindow(SQueryRuntimeEnv *pRuntimeEnv, int32_t num);
void clearClosedTimeWindow(SQueryRuntimeEnv* pRuntimeEnv);
int32_t numOfClosedTimeWindow(SWindowResInfo* pWindowResInfo);
void closeTimeWindow(SWindowResInfo* pWindowResInfo, int32_t slot);
void closeAllTimeWindow(SWindowResInfo* pWindowResInfo);
#ifdef __cplusplus
}
#endif
#endif // TDENGINE_VNODEQUERYIMPL_H
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_VNODEREAD_H
#define TDENGINE_VNODEREAD_H
#ifdef __cplusplus
extern "C" {
#endif
#include "os.h"
#include "qresultBuf.h"
#include "qinterpolation.h"
#include "vnodeTagMgmt.h"
/*
* use to keep the first point position, consisting of position in blk and block
* id, file id
*/
typedef struct {
int32_t pos;
int32_t slot;
int32_t fileId;
} SPositionInfo;
typedef struct SLoadDataBlockInfo {
int32_t fileListIndex; /* index of this file in files list of this vnode */
int32_t fileId;
int32_t slotIdx;
int32_t sid;
bool tsLoaded; // if timestamp column of current block is loaded or not
} SLoadDataBlockInfo;
typedef struct SLoadCompBlockInfo {
int32_t sid; /* meter sid */
int32_t fileId;
int32_t fileListIndex;
} SLoadCompBlockInfo;
/*
* the header file info for one vnode
*/
typedef struct SHeaderFileInfo {
int32_t fileID; // file id
} SHeaderFileInfo;
typedef struct SQueryCostSummary {
double cacheTimeUs;
double fileTimeUs;
int64_t numOfFiles; // opened files during query
int64_t numOfTables; // num of queries tables
int64_t numOfSeek; // number of seek operation
int64_t readDiskBlocks; // accessed disk block
int64_t skippedFileBlocks; // skipped blocks
int64_t blocksInCache; // accessed cache blocks
int64_t readField; // field size
int64_t totalFieldSize; // total read fields size
double loadFieldUs; // total elapsed time to read fields info
int64_t totalBlockSize; // read data blocks
double loadBlocksUs; // total elapsed time to read data blocks
int64_t totalGenData; // in-memory generated data
int64_t readCompInfo; // read compblock info
int64_t totalCompInfoSize; // total comp block size
double loadCompInfoUs; // total elapsed time to read comp block info
int64_t tmpBufferInDisk; // size of buffer for intermediate result
} SQueryCostSummary;
typedef struct SPosInfo {
int16_t pageId;
int16_t rowId;
} SPosInfo;
typedef struct STimeWindow {
TSKEY skey;
TSKEY ekey;
} STimeWindow;
typedef struct SWindowStatus {
bool closed;
} SWindowStatus;
typedef struct SWindowResult {
uint16_t numOfRows;
SPosInfo pos; // Position of current result in disk-based output buffer
SResultInfo* resultInfo; // For each result column, there is a resultInfo
STimeWindow window; // The time window that current result covers.
SWindowStatus status;
} SWindowResult;
/*
* header files info, avoid to iterate the directory, the data is acquired
* during in query preparation function
*/
typedef struct SQueryFilesInfo {
SHeaderFileInfo* pFileInfo;
uint32_t numOfFiles; // the total available number of files for this virtual node during query execution
int32_t current; // the memory mapped header file, NOTE: only one header file can be mmap.
int32_t vnodeId;
int32_t headerFd; // header file fd
int64_t headerFileSize;
int32_t dataFd;
int32_t lastFd;
char headerFilePath[PATH_MAX]; // current opened header file name
char dataFilePath[PATH_MAX]; // current opened data file name
char lastFilePath[PATH_MAX]; // current opened last file path
char dbFilePathPrefix[PATH_MAX];
} SQueryFilesInfo;
typedef struct SWindowResInfo {
SWindowResult* pResult; // reference to SQuerySupporter->pResult
void* hashList; // hash list for quick access
int16_t type; // data type for hash key
int32_t capacity; // max capacity
int32_t curIndex; // current start active index
int32_t size;
int64_t startTime; // start time of the first time window for sliding query
int64_t prevSKey; // previous (not completed) sliding window start key
int64_t threshold; // threshold for return completed results.
} SWindowResInfo;
typedef struct SQueryRuntimeEnv {
SPositionInfo startPos; /* the start position, used for secondary/third iteration */
SPositionInfo endPos; /* the last access position in query, served as the start pos of reversed order query */
SPositionInfo nextPos; /* start position of the next scan */
SData* colDataBuffer[TSDB_MAX_COLUMNS];
SResultInfo* resultInfo; // todo refactor to merge with SWindowResInfo
uint8_t blockStatus; // Indicate if data block is loaded, the block is first/last/internal block
int32_t unzipBufSize;
SData* primaryColBuffer;
char* unzipBuffer;
char* secondaryUnzipBuffer;
SQuery* pQuery;
SMeterObj* pMeterObj;
SQLFunctionCtx* pCtx;
SLoadDataBlockInfo loadBlockInfo; /* record current block load information */
SLoadCompBlockInfo loadCompBlockInfo; /* record current compblock information in SQuery */
SQueryFilesInfo vnodeFileInfo;
int16_t numOfRowsPerPage;
int16_t offset[TSDB_MAX_COLUMNS];
uint16_t scanFlag; // denotes reversed scan of data or not
SInterpolationInfo interpoInfo;
SData** pInterpoBuf;
SWindowResInfo windowResInfo;
STSBuf* pTSBuf;
STSCursor cur;
SQueryCostSummary summary;
bool stableQuery; // is super table query or not
SDiskbasedResultBuf* pResultBuf; // query result buffer based on blocked-wised disk file
/*
* Temporarily hold the in-memory cache block info during scan cache blocks
* Here we do not use the cache block info from pMeterObj, simple because it may change anytime
* during the query by the submit/insert handling threads.
* So we keep a copy of the support structure as well as the cache block data itself.
*/
SCacheBlock cacheBlock;
} SQueryRuntimeEnv;
/* intermediate pos during multimeter query involves interval */
typedef struct SMeterQueryInfo {
int64_t lastKey;
int64_t skey;
int64_t ekey;
int32_t numOfRes;
int16_t queryRangeSet; // denote if the query range is set, only available for interval query
int64_t tag;
STSCursor cur;
int32_t sid; // for retrieve the page id list
SWindowResInfo windowResInfo;
} SMeterQueryInfo;
typedef struct SMeterDataInfo {
uint64_t offsetInHeaderFile;
int32_t numOfBlocks;
int32_t start; // start block index
SCompBlock* pBlock;
int32_t meterOrderIdx;
SMeterObj* pMeterObj;
int32_t groupIdx; // group id in meter list
SMeterQueryInfo* pMeterQInfo;
} SMeterDataInfo;
typedef struct STableQuerySupportObj {
void* pMetersHashTable; // meter table hash list
SMeterSidExtInfo** pMeterSidExtInfo;
int32_t numOfMeters;
/*
* multimeter query resultset.
* In multimeter queries, the result is temporarily stored on this structure, instead of
* directly put result into output buffer, since we have no idea how many number of
* rows may be generated by a specific subgroup. When query on all subgroups is executed,
* the result is copy to output buffer. This attribution is not used during single meter query processing.
*/
SQueryRuntimeEnv runtimeEnv;
int64_t rawSKey;
int64_t rawEKey;
int32_t subgroupIdx;
int32_t offset; /* offset in group result set of subgroup */
tSidSet* pSidSet;
/*
* the query is executed position on which meter of the whole list.
* when the index reaches the last one of the list, it means the query is completed.
* We later may refactor to remove this attribution by using another flag to denote
* whether a multimeter query is completed or not.
*/
int32_t meterIdx;
int32_t numOfGroupResultPages;
int32_t groupResultSize;
SMeterDataInfo* pMeterDataInfo;
TSKEY* tsList;
} STableQuerySupportObj;
typedef struct _qinfo {
uint64_t signature;
int32_t refCount; // QInfo reference count, when the value is 0, it can be released safely
char user[TSDB_TABLE_ID_LEN + 1];
char sql[TSDB_SHOW_SQL_LEN];
uint8_t stream;
uint16_t port;
uint32_t ip;
uint64_t startTime;
int64_t useconds;
int killed;
struct _qinfo *prev, *next;
SQuery query;
int totalPoints;
int pointsRead;
int pointsReturned;
int pointsInterpo;
int code;
char bufIndex;
char changed;
char over;
SMeterObj* pObj;
sem_t dataReady;
STableQuerySupportObj* pTableQuerySupporter;
int (*fp)(SMeterObj*, SQuery*);
} SQInfo;
int32_t vnodeQueryTablePrepare(SQInfo* pQInfo, SMeterObj* pMeterObj, STableQuerySupportObj* pSMultiMeterObj,
void* param);
void vnodeQueryFreeQInfoEx(SQInfo* pQInfo);
bool vnodeParametersSafetyCheck(SQuery* pQuery);
int32_t vnodeSTableQueryPrepare(SQInfo* pQInfo, SQuery* pQuery, void* param);
/**
* decrease the numofQuery of each table that is queried, enable the
* remove/close operation can be executed
* @param pQInfo
*/
void vnodeDecMeterRefcnt(SQInfo* pQInfo);
/* sql query handle in dnode */
void vnodeSingleTableQuery(SSchedMsg* pMsg);
/*
* handle multi-meter query process
*/
void vnodeMultiMeterQuery(SSchedMsg* pMsg);
#ifdef __cplusplus
}
#endif
#endif // TDENGINE_VNODEREAD_H
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_VNODESHELL_H
#define TDENGINE_VNODESHELL_H
#ifdef __cplusplus
extern "C" {
#endif
#include "os.h"
#ifdef __cplusplus
}
#endif
#endif // TDENGINE_VNODESHELL_H
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_VNODESTORE_H
#define TDENGINE_VNODESTORE_H
#ifdef __cplusplus
extern "C" {
#endif
void vnodeProcessDataFromVnode(SIntMsg *msg, void *tcpHandle);
void vnodeCalcOpenVnodes();
bool vnodeRemoveDataFileFromLinkFile(char* linkFile, char* de_name);
int vnodeInitInfo();
#ifdef __cplusplus
}
#endif
#endif // TDEGINE_VNODESTORE_H
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TBASE_MNODE_SUPER_TABLE_QUERY_H
#define TBASE_MNODE_SUPER_TABLE_QUERY_H
#include "os.h"
#include "mnode.h"
#include "qast.h"
int32_t mgmtDoJoin(SSuperTableMetaMsg* pSuperTableMetaMsg, tQueryResultset* pRes);
void mgmtReorganizeMetersInMetricMeta(SSuperTableMetaMsg* pInfo, int32_t index, tQueryResultset* pRes);
#endif
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_VNODESYSTEM_H
#define TDENGINE_VNODESYSTEM_H
#ifdef __cplusplus
extern "C" {
#endif
#ifdef __cplusplus
}
#endif
#endif // TDENGINE_VNODESYSTEM_H
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_VNODETAGMGMT_H
#define TDENGINE_VNODETAGMGMT_H
#ifdef __cplusplus
extern "C" {
#endif
/*
* @version 0.1
* @date 2018/01/02
* @author liaohj
* management of the tag value of tables
* in query, client need the vnode to aggregate results according to tags
* values,
* the grouping operation is done here.
* Note:
* 1. we implement a quick sort algorithm, may remove it later.
*/
typedef int32_t (*__ext_compar_fn_t)(const void *p1, const void *p2, void *param);
tSidSet *tSidSetCreate(struct SMeterSidExtInfo **pMeterSidExtInfo, int32_t numOfMeters, SSchema *pSchema,
int32_t numOfTags, SColIndexEx *colList, int32_t numOfOrderCols);
int32_t *calculateSubGroup(void **pSids, int32_t numOfMeters, int32_t *numOfSubset, tOrderDescriptor *pOrderDesc,
__ext_compar_fn_t compareFn);
void tSidSetDestroy(tSidSet **pSets);
void tSidSetSort(tSidSet *pSets);
int32_t meterSidComparator(const void *s1, const void *s2, void *param);
int32_t doCompare(char *f1, char *f2, int32_t type, int32_t size);
void tQSortEx(void **pMeterSids, size_t size, int32_t start, int32_t end, void *param, __ext_compar_fn_t compareFn);
#ifdef __cplusplus
}
#endif
#endif // TDENGINE_VNODETAGMGMT_H
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_VNODE_UTIL_H
#define TDENGINE_VNODE_UTIL_H
#ifdef __cplusplus
extern "C" {
#endif
/* get the qinfo struct address from the query struct address */
#define GET_COLUMN_BYTES(query, colidx) \
((query)->colList[(query)->pSelectExpr[colidx].pBase.colInfo.colIdxInBuf].data.bytes)
#define GET_COLUMN_TYPE(query, colidx) \
((query)->colList[(query)->pSelectExpr[colidx].pBase.colInfo.colIdxInBuf].data.type)
#define QUERY_IS_ASC_QUERY(q) (GET_FORWARD_DIRECTION_FACTOR((q)->order.order) == QUERY_ASC_FORWARD_STEP)
#define EXTRA_BYTES 2 // for possible compression deflation
#define GET_COL_DATA_POS(query, index, step) ((query)->pos + (index)*(step))
int vnodeGetEid(int days);
int vnodeCheckFileIntegrity(FILE *fp);
void vnodeCreateFileHeader(FILE *fp);
void vnodeCreateFileHeaderFd(int fd);
void vnodeGetHeadFileHeaderInfo(int fd, SVnodeHeadInfo *pHeadInfo);
void vnodeUpdateHeadFileHeader(int fd, SVnodeHeadInfo *pHeadInfo);
/**
* check if two schema is identical or not
* This function does not check if a schema is valid or not
*
* @param pSSchemaFirst
* @param numOfCols1
* @param pSSchemaSecond
* @param numOfCols2
* @return
*/
bool vnodeMeterSchemaIdentical(SColumn *pSchema1, int32_t numOfCols1, SColumn *pSchema2, int32_t numOfCols2);
/**
* free SFields in SQuery
* vnodeFreeFields must be called before free(pQuery->pBlock);
* @param pQuery
*/
void vnodeFreeFields(SQuery *pQuery);
void vnodeUpdateFilterColumnIndex(SQuery* pQuery);
void vnodeUpdateQueryColumnIndex(SQuery* pQuery, SMeterObj* pMeterObj);
int32_t vnodeCreateFilterInfo(void* pQInfo, SQuery *pQuery);
bool vnodeFilterData(SQuery* pQuery, int32_t* numOfActualRead, int32_t index);
bool vnodeDoFilterData(SQuery* pQuery, int32_t elemPos);
bool vnodeIsProjectionQuery(SSqlFunctionExpr *pExpr, int32_t numOfOutput);
int32_t vnodeIncQueryRefCount(SQueryMeterMsg *pQueryMsg, SMeterSidExtInfo **pSids, SMeterObj **pMeterObjList,
int32_t *numOfInc);
void vnodeDecQueryRefCount(SQueryMeterMsg *pQueryMsg, SMeterObj **pMeterObjList, int32_t numOfInc);
int32_t vnodeSetMeterState(SMeterObj* pMeterObj, int32_t state);
void vnodeClearMeterState(SMeterObj* pMeterObj, int32_t state);
bool vnodeIsMeterState(SMeterObj* pMeterObj, int32_t state);
void vnodeSetMeterDeleting(SMeterObj* pMeterObj);
int32_t vnodeSetMeterInsertImportStateEx(SMeterObj* pObj, int32_t st);
bool vnodeIsSafeToDeleteMeter(SVnodeObj* pVnode, int32_t sid);
void vnodeFreeColumnInfo(SColumnInfo* pColumnInfo);
bool isGroupbyNormalCol(SSqlGroupbyExpr* pExpr);
#ifdef __cplusplus
}
#endif
#endif // TDENGINE_VNODE_UTIL_H
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "os.h"
#include "taosmsg.h"
#include "vnode.h"
#include "vnodeCache.h"
#include "vnodeUtil.h"
#include "vnodeStatus.h"
void vnodeSearchPointInCache(SMeterObj *pObj, SQuery *pQuery);
void vnodeProcessCommitTimer(void *param, void *tmrId);
void *vnodeOpenCachePool(int vnode) {
SCachePool *pCachePool;
SVnodeCfg * pCfg = &vnodeList[vnode].cfg;
int blockId = 0;
char * pMem = NULL;
pCachePool = (SCachePool *)malloc(sizeof(SCachePool));
if (pCachePool == NULL) {
dError("no memory to allocate cache pool!");
return NULL;
}
memset(pCachePool, 0, sizeof(SCachePool));
pCachePool->count = 1;
pCachePool->vnode = vnode;
pthread_mutex_init(&(pCachePool->vmutex), NULL);
size_t size = sizeof(char *) * pCfg->cacheNumOfBlocks.totalBlocks;
pCachePool->pMem = malloc(size);
if (pCachePool->pMem == NULL) {
dError("no memory to allocate cache blocks!");
pthread_mutex_destroy(&(pCachePool->vmutex));
tfree(pCachePool);
return NULL;
}
memset(pCachePool->pMem, 0, size);
pCachePool->threshold = pCfg->cacheNumOfBlocks.totalBlocks * 0.6;
int maxAllocBlock = (1024 * 1024 * 1024) / pCfg->cacheBlockSize;
if (maxAllocBlock < 1) {
dError("Cache block size is too large");
pthread_mutex_destroy(&(pCachePool->vmutex));
tfree(pCachePool->pMem);
tfree(pCachePool);
return NULL;
}
while (blockId < pCfg->cacheNumOfBlocks.totalBlocks) {
// TODO : Allocate real blocks
int allocBlocks = MIN(pCfg->cacheNumOfBlocks.totalBlocks - blockId, maxAllocBlock);
pMem = calloc(allocBlocks, pCfg->cacheBlockSize);
if (pMem == NULL) {
dError("failed to allocate cache memory: %d", allocBlocks*pCfg->cacheBlockSize);
goto _err_exit;
}
for (int i = 0; i < allocBlocks; i++) {
pCachePool->pMem[blockId] = pMem + i * pCfg->cacheBlockSize;
blockId++;
}
}
dPrint("vid:%d, cache pool is allocated:0x%x", vnode, pCachePool);
return pCachePool;
_err_exit:
pthread_mutex_destroy(&(pCachePool->vmutex));
// TODO : Free the cache blocks and return
blockId = 0;
while (blockId < pCfg->cacheNumOfBlocks.totalBlocks) {
tfree(pCachePool->pMem[blockId]);
blockId = blockId + (MIN(maxAllocBlock, pCfg->cacheNumOfBlocks.totalBlocks - blockId));
}
tfree(pCachePool->pMem);
tfree(pCachePool);
return NULL;
}
void vnodeCloseCachePool(int vnode) {
SVnodeObj * pVnode = vnodeList + vnode;
SCachePool *pCachePool = (SCachePool *)pVnode->pCachePool;
int blockId = 0;
taosTmrStopA(&pVnode->commitTimer);
if (pVnode->commitInProcess) pthread_cancel(pVnode->commitThread);
dPrint("vid:%d, cache pool closed, count:%d", vnode, pCachePool->count);
int maxAllocBlock = (1024 * 1024 * 1024) / pVnode->cfg.cacheBlockSize;
while (blockId < pVnode->cfg.cacheNumOfBlocks.totalBlocks) {
tfree(pCachePool->pMem[blockId]);
blockId = blockId + (MIN(maxAllocBlock, pVnode->cfg.cacheNumOfBlocks.totalBlocks - blockId));
}
tfree(pCachePool->pMem);
pthread_mutex_destroy(&(pCachePool->vmutex));
tfree(pCachePool);
pVnode->pCachePool = NULL;
}
void *vnodeAllocateCacheInfo(SMeterObj *pObj) {
SCacheInfo *pInfo;
size_t size;
SVnodeCfg * pCfg = &vnodeList[pObj->vnode].cfg;
size = sizeof(SCacheInfo);
pInfo = (SCacheInfo *)malloc(size);
if (pInfo == NULL) {
dError("id:%s, no memory for cacheInfo", pObj->meterId);
return NULL;
}
memset(pInfo, 0, size);
pInfo->maxBlocks = vnodeList[pObj->vnode].cfg.blocksPerMeter;
size = sizeof(SCacheBlock *) * pInfo->maxBlocks;
pInfo->cacheBlocks = (SCacheBlock **)malloc(size);
if (pInfo->cacheBlocks == NULL) {
dError("id:%s, no memory for cacheBlocks", pObj->meterId);
tfree(pInfo);
return NULL;
}
memset(pInfo->cacheBlocks, 0, size);
pInfo->currentSlot = -1;
pObj->pointsPerBlock =
(pCfg->cacheBlockSize - sizeof(SCacheBlock) - pObj->numOfColumns * sizeof(char *)) / pObj->bytesPerPoint;
if (pObj->pointsPerBlock > pObj->pointsPerFileBlock) pObj->pointsPerBlock = pObj->pointsPerFileBlock;
pObj->pCache = (void *)pInfo;
pObj->freePoints = pObj->pointsPerBlock * pInfo->maxBlocks;
return (void *)pInfo;
}
int vnodeFreeCacheBlock(SCacheBlock *pCacheBlock) {
SMeterObj * pObj;
SCacheInfo *pInfo;
if (pCacheBlock == NULL) return -1;
pObj = pCacheBlock->pMeterObj;
pInfo = (SCacheInfo *)pObj->pCache;
if (pObj) {
pInfo->numOfBlocks--;
if (pInfo->numOfBlocks < 0) {
dError("vid:%d sid:%d id:%s, numOfBlocks:%d shall never be negative", pObj->vnode, pObj->sid, pObj->meterId,
pInfo->numOfBlocks);
}
if (pCacheBlock->blockId == 0) {
dError("vid:%d sid:%d id:%s, double free", pObj->vnode, pObj->sid, pObj->meterId);
}
SCachePool *pPool = (SCachePool *)vnodeList[pObj->vnode].pCachePool;
if (pCacheBlock->notFree) {
pPool->notFreeSlots--;
pInfo->unCommittedBlocks--;
dTrace("vid:%d sid:%d id:%s, cache block is not free, slot:%d, index:%d notFreeSlots:%d",
pObj->vnode, pObj->sid, pObj->meterId, pCacheBlock->slot, pCacheBlock->index, pPool->notFreeSlots);
}
dTrace("vid:%d sid:%d id:%s, free a cache block, numOfBlocks:%d, slot:%d, index:%d notFreeSlots:%d",
pObj->vnode, pObj->sid, pObj->meterId, pInfo->numOfBlocks, pCacheBlock->slot, pCacheBlock->index,
pPool->notFreeSlots);
memset(pCacheBlock, 0, sizeof(SCacheBlock));
} else {
dError("BUG, pObj is null");
}
return 0;
}
void vnodeFreeCacheInfo(SMeterObj *pObj) {
SCacheInfo * pInfo;
SCacheBlock *pCacheBlock;
SCachePool * pPool;
int slot, numOfBlocks;
if (pObj == NULL || pObj->pCache == NULL) return;
pPool = (SCachePool *)vnodeList[pObj->vnode].pCachePool;
pInfo = (SCacheInfo *)pObj->pCache;
if (pPool == NULL || pInfo == NULL) return;
pthread_mutex_lock(&pPool->vmutex);
numOfBlocks = pInfo->numOfBlocks;
slot = pInfo->currentSlot;
for (int i = 0; i < numOfBlocks; ++i) {
pCacheBlock = pInfo->cacheBlocks[slot];
vnodeFreeCacheBlock(pCacheBlock);
slot = (slot - 1 + pInfo->maxBlocks) % pInfo->maxBlocks;
}
pObj->pCache = NULL;
tfree(pInfo->cacheBlocks);
tfree(pInfo);
pthread_mutex_unlock(&pPool->vmutex);
}
uint64_t vnodeGetPoolCount(SVnodeObj *pVnode) {
SCachePool *pPool;
pPool = (SCachePool *)pVnode->pCachePool;
return pPool->count;
}
void vnodeUpdateCommitInfo(SMeterObj *pObj, int slot, int pos, uint64_t count) {
SCacheInfo * pInfo;
SCacheBlock *pBlock;
SCachePool * pPool;
pInfo = (SCacheInfo *)pObj->pCache;
pPool = (SCachePool *)vnodeList[pObj->vnode].pCachePool;
int tslot =
(pInfo->commitPoint == pObj->pointsPerBlock) ? (pInfo->commitSlot + 1) % pInfo->maxBlocks : pInfo->commitSlot;
int slots = 0;
while (tslot != slot || ((tslot == slot) && (pos == pObj->pointsPerBlock))) {
slots++;
pthread_mutex_lock(&pPool->vmutex);
pBlock = pInfo->cacheBlocks[tslot];
assert(pBlock->notFree);
pBlock->notFree = 0;
pInfo->unCommittedBlocks--;
pPool->notFreeSlots--;
pthread_mutex_unlock(&pPool->vmutex);
dTrace("vid:%d sid:%d id:%s, cache block is committed, slot:%d, index:%d notFreeSlots:%d, unCommittedBlocks:%d",
pObj->vnode, pObj->sid, pObj->meterId, pBlock->slot, pBlock->index, pPool->notFreeSlots,
pInfo->unCommittedBlocks);
if (tslot == slot) break;
tslot = (tslot + 1) % pInfo->maxBlocks;
}
atomic_fetch_add_32(&pObj->freePoints, pObj->pointsPerBlock * slots);
pInfo->commitSlot = slot;
pInfo->commitPoint = pos;
pObj->commitCount = count;
}
TSKEY vnodeGetFirstKey(int vnode) {
SMeterObj * pObj;
SCacheInfo * pInfo;
SCacheBlock *pCacheBlock;
SVnodeCfg *pCfg = &vnodeList[vnode].cfg;
TSKEY key = taosGetTimestamp(pCfg->precision);
for (int sid = 0; sid < pCfg->maxSessions; ++sid) {
pObj = vnodeList[vnode].meterList[sid];
if (pObj == NULL || pObj->pCache == NULL) continue;
pInfo = (SCacheInfo *)pObj->pCache;
pCacheBlock = pInfo->cacheBlocks[0];
if (pCacheBlock == NULL || pCacheBlock->numOfPoints <= 0) continue;
if (*((TSKEY *)(pCacheBlock->offset[0])) < key) key = *((TSKEY *)(pCacheBlock->offset[0]));
}
return key;
}
pthread_t vnodeCreateCommitThread(SVnodeObj *pVnode) {
// this function has to mutex locked before it is called
pthread_attr_t thattr;
SCachePool * pPool = (SCachePool *)pVnode->pCachePool;
if (pPool->commitInProcess) {
dTrace("vid:%d, commit is already in process", pVnode->vnode);
return pVnode->commitThread;
}
taosTmrStopA(&pVnode->commitTimer);
if (pVnode->vnodeStatus == TSDB_VN_STATUS_UNSYNCED) {
taosTmrReset(vnodeProcessCommitTimer, pVnode->cfg.commitTime * 1000, pVnode, vnodeTmrCtrl, &pVnode->commitTimer);
dTrace("vid:%d, it is in unsyc state, commit later", pVnode->vnode);
return pVnode->commitThread;
}
pthread_attr_init(&thattr);
pthread_attr_setdetachstate(&thattr, PTHREAD_CREATE_DETACHED);
if (pthread_create(&(pVnode->commitThread), &thattr, vnodeCommitToFile, pVnode) != 0) {
dError("vid:%d, failed to create thread to commit file, reason:%s", pVnode->vnode, strerror(errno));
} else {
pPool->commitInProcess = 1;
dTrace("vid:%d, commit thread: 0x%lx is created", pVnode->vnode, pVnode->commitThread);
}
pthread_attr_destroy(&thattr);
return pVnode->commitThread;
}
void vnodeProcessCommitTimer(void *param, void *tmrId) {
SVnodeObj * pVnode = (SVnodeObj *)param;
SCachePool *pPool = (SCachePool *)pVnode->pCachePool;
pthread_mutex_lock(&pPool->vmutex);
vnodeCreateCommitThread(pVnode);
pthread_mutex_unlock(&pPool->vmutex);
}
void vnodeCommitOver(SVnodeObj *pVnode) {
SCachePool *pPool = (SCachePool *)(pVnode->pCachePool);
taosTmrReset(vnodeProcessCommitTimer, pVnode->cfg.commitTime * 1000, pVnode, vnodeTmrCtrl, &pVnode->commitTimer);
pthread_mutex_lock(&pPool->vmutex);
pPool->commitInProcess = 0;
dTrace("vid:%d, commit is over, notFreeSlots:%d", pPool->vnode, pPool->notFreeSlots);
pthread_mutex_unlock(&pPool->vmutex);
}
static void vnodeWaitForCommitComplete(SVnodeObj *pVnode) {
SCachePool *pPool = (SCachePool *)(pVnode->pCachePool);
// wait for 100s at most
const int32_t totalCount = 1000;
int32_t count = 0;
// all meter is marked as dropped, so the commit will abort very quickly
while(count++ < totalCount) {
int32_t commitInProcess = 0;
pthread_mutex_lock(&pPool->vmutex);
commitInProcess = pPool->commitInProcess;
pthread_mutex_unlock(&pPool->vmutex);
if (commitInProcess) {
dWarn("vid:%d still in commit, wait for completed", pVnode->vnode);
taosMsleep(10);
}
}
}
void vnodeCancelCommit(SVnodeObj *pVnode) {
SCachePool *pPool = (SCachePool *)(pVnode->pCachePool);
if (pPool == NULL) return;
vnodeWaitForCommitComplete(pVnode);
taosTmrReset(vnodeProcessCommitTimer, pVnode->cfg.commitTime * 1000, pVnode, vnodeTmrCtrl, &pVnode->commitTimer);
}
/* The vnode cache lock should be hold before calling this interface
*/
SCacheBlock *vnodeGetFreeCacheBlock(SVnodeObj *pVnode) {
SCachePool *pPool = (SCachePool *)(pVnode->pCachePool);
SVnodeCfg *pCfg = &(pVnode->cfg);
SCacheBlock *pCacheBlock = NULL;
int skipped = 0;
while (1) {
pCacheBlock = (SCacheBlock *)(pPool->pMem[((int64_t)pPool->freeSlot)]);
if (pCacheBlock->blockId == 0) break;
if (pCacheBlock->notFree) {
pPool->freeSlot++;
pPool->freeSlot = pPool->freeSlot % pCfg->cacheNumOfBlocks.totalBlocks;
skipped++;
if (skipped > pPool->threshold) {
vnodeCreateCommitThread(pVnode);
pthread_mutex_unlock(&pPool->vmutex);
dError("vid:%d committing process is too slow, notFreeSlots:%d....", pVnode->vnode, pPool->notFreeSlots);
return NULL;
}
} else {
SMeterObj * pRelObj = pCacheBlock->pMeterObj;
SCacheInfo *pRelInfo = (SCacheInfo *)pRelObj->pCache;
int firstSlot = (pRelInfo->currentSlot - pRelInfo->numOfBlocks + 1 + pRelInfo->maxBlocks) % pRelInfo->maxBlocks;
pCacheBlock = pRelInfo->cacheBlocks[firstSlot];
if (pCacheBlock) {
pPool->freeSlot = pCacheBlock->index;
vnodeFreeCacheBlock(pCacheBlock);
break;
} else {
pPool->freeSlot = (pPool->freeSlot + 1) % pCfg->cacheNumOfBlocks.totalBlocks;
skipped++;
}
}
}
pCacheBlock = (SCacheBlock *)(pPool->pMem[pPool->freeSlot]);
pCacheBlock->index = pPool->freeSlot;
pCacheBlock->notFree = 1;
pPool->freeSlot = (pPool->freeSlot + 1) % pCfg->cacheNumOfBlocks.totalBlocks;
pPool->notFreeSlots++;
return pCacheBlock;
}
int vnodeAllocateCacheBlock(SMeterObj *pObj) {
int index;
SCachePool * pPool;
SCacheBlock *pCacheBlock;
SCacheInfo * pInfo;
SVnodeObj * pVnode;
int commit = 0;
pVnode = vnodeList + pObj->vnode;
pPool = (SCachePool *)pVnode->pCachePool;
pInfo = (SCacheInfo *)pObj->pCache;
SVnodeCfg *pCfg = &(vnodeList[pObj->vnode].cfg);
if (pPool == NULL) return -1;
pthread_mutex_lock(&pPool->vmutex);
if (pInfo == NULL || pInfo->cacheBlocks == NULL) {
pthread_mutex_unlock(&pPool->vmutex);
dError("vid:%d sid:%d id:%s, meter is not there", pObj->vnode, pObj->sid, pObj->meterId);
return -1;
}
if (pPool->count <= 1) {
if (pVnode->commitTimer == NULL)
pVnode->commitTimer = taosTmrStart(vnodeProcessCommitTimer, pCfg->commitTime * 1000, pVnode, vnodeTmrCtrl);
}
if (pInfo->unCommittedBlocks >= pInfo->maxBlocks-1) {
vnodeCreateCommitThread(pVnode);
pthread_mutex_unlock(&pPool->vmutex);
dError("vid:%d sid:%d id:%s, all blocks are not committed yet....", pObj->vnode, pObj->sid, pObj->meterId);
return -1;
}
if ((pCacheBlock = vnodeGetFreeCacheBlock(pVnode)) == NULL) return -1;
index = pCacheBlock->index;
pCacheBlock->pMeterObj = pObj;
pCacheBlock->offset[0] = ((char *)(pCacheBlock)) + sizeof(SCacheBlock) + pObj->numOfColumns * sizeof(char *);
for (int col = 1; col < pObj->numOfColumns; ++col)
pCacheBlock->offset[col] = pCacheBlock->offset[col - 1] + pObj->schema[col - 1].bytes * pObj->pointsPerBlock;
pInfo->numOfBlocks++;
pInfo->blocks++;
pInfo->unCommittedBlocks++;
pInfo->currentSlot = (pInfo->currentSlot + 1) % pInfo->maxBlocks;
pCacheBlock->blockId = pInfo->blocks;
pCacheBlock->slot = pInfo->currentSlot;
if (pInfo->numOfBlocks > pInfo->maxBlocks) {
pCacheBlock = pInfo->cacheBlocks[pInfo->currentSlot];
vnodeFreeCacheBlock(pCacheBlock);
}
pInfo->cacheBlocks[pInfo->currentSlot] = (SCacheBlock *)(pPool->pMem[(int64_t)index]);
dTrace("vid:%d sid:%d id:%s, allocate a cache block, numOfBlocks:%d, slot:%d, index:%d notFreeSlots:%d blocks:%d",
pObj->vnode, pObj->sid, pObj->meterId, pInfo->numOfBlocks, pInfo->currentSlot, index, pPool->notFreeSlots,
pInfo->blocks);
if (((pPool->notFreeSlots > pPool->threshold) || (pInfo->unCommittedBlocks >= pInfo->maxBlocks / 2))) {
dTrace("vid:%d sid:%d id:%s, too many unCommitted slots, unCommitted:%d notFreeSlots:%d",
pObj->vnode, pObj->sid, pObj->meterId, pInfo->unCommittedBlocks, pPool->notFreeSlots);
vnodeCreateCommitThread(pVnode);
commit = 1;
}
pthread_mutex_unlock(&pPool->vmutex);
return commit;
}
int vnodeInsertPointToCache(SMeterObj *pObj, char *pData) {
SCacheBlock *pCacheBlock;
SCacheInfo * pInfo;
SCachePool * pPool;
pInfo = (SCacheInfo *)pObj->pCache;
pPool = (SCachePool *)vnodeList[pObj->vnode].pCachePool;
if (pInfo->numOfBlocks == 0) {
if (vnodeAllocateCacheBlock(pObj) < 0) {
return -1;
}
}
if (pInfo->currentSlot < 0) return -1;
pCacheBlock = pInfo->cacheBlocks[pInfo->currentSlot];
if (pCacheBlock->numOfPoints >= pObj->pointsPerBlock) {
if (vnodeAllocateCacheBlock(pObj) < 0) return -1;
pCacheBlock = pInfo->cacheBlocks[pInfo->currentSlot];
}
for (int col = 0; col < pObj->numOfColumns; ++col) {
memcpy(pCacheBlock->offset[col] + pCacheBlock->numOfPoints * pObj->schema[col].bytes, pData,
pObj->schema[col].bytes);
pData += pObj->schema[col].bytes;
}
atomic_fetch_sub_32(&pObj->freePoints, 1);
pCacheBlock->numOfPoints++;
pPool->count++;
return 0;
}
void vnodeUpdateQuerySlotPos(SCacheInfo *pInfo, SQuery *pQuery) {
SCacheBlock *pCacheBlock;
int step = QUERY_IS_ASC_QUERY(pQuery) ? -1 : 1;
if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->slot == pQuery->currentSlot)) ||
(!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->slot == pQuery->firstSlot))) {
pQuery->over = 1;
} else {
pQuery->slot = (pQuery->slot - step + pInfo->maxBlocks) % pInfo->maxBlocks;
pCacheBlock = pInfo->cacheBlocks[pQuery->slot];
pQuery->pos = QUERY_IS_ASC_QUERY(pQuery) ? 0 : pCacheBlock->numOfPoints - 1;
}
}
static FORCE_INLINE TSKEY vnodeGetTSInCacheBlock(SCacheBlock *pCacheBlock, int32_t pos) {
return *(TSKEY *)(pCacheBlock->offset[PRIMARYKEY_TIMESTAMP_COL_INDEX] + pos * TSDB_KEYSIZE);
}
int vnodeQueryFromCache(SMeterObj *pObj, SQuery *pQuery) {
SCacheBlock *pCacheBlock;
int col, step;
char * pRead, *pData;
SCacheInfo * pInfo;
int lastPos = -1;
int startPos, numOfReads, numOfPoints;
pQuery->pointsRead = 0;
if (pQuery->over) return 0;
vnodeFreeFields(pQuery);
pInfo = (SCacheInfo *)pObj->pCache;
if ((pInfo == NULL) || (pInfo->numOfBlocks == 0)) {
pQuery->over = 1;
return 0;
}
if (pQuery->slot < 0 || pQuery->pos < 0) // it means a new query, we need to find the point first
vnodeSearchPointInCache(pObj, pQuery);
if (pQuery->slot < 0 || pQuery->pos < 0) {
pQuery->over = 1;
return 0;
}
step = QUERY_IS_ASC_QUERY(pQuery) ? -1 : 1;
pCacheBlock = pInfo->cacheBlocks[pQuery->slot];
numOfPoints = pCacheBlock->numOfPoints;
int maxReads = QUERY_IS_ASC_QUERY(pQuery) ? numOfPoints - pQuery->pos : pQuery->pos + 1;
if (maxReads <= 0) {
vnodeUpdateQuerySlotPos(pInfo, pQuery);
return 0;
}
TSKEY startkey = vnodeGetTSInCacheBlock(pCacheBlock, 0);
TSKEY endkey = vnodeGetTSInCacheBlock(pCacheBlock, numOfPoints - 1);
if (QUERY_IS_ASC_QUERY(pQuery)) {
if (endkey < pQuery->ekey) {
numOfReads = maxReads;
} else {
lastPos = (*vnodeSearchKeyFunc[pObj->searchAlgorithm])(
pCacheBlock->offset[PRIMARYKEY_TIMESTAMP_COL_INDEX] + TSDB_KEYSIZE * pQuery->pos, maxReads, pQuery->ekey, 0);
numOfReads = (lastPos >= 0) ? lastPos + 1 : 0;
}
} else {
if (startkey > pQuery->ekey) {
numOfReads = maxReads;
} else {
lastPos = (*vnodeSearchKeyFunc[pObj->searchAlgorithm])(pCacheBlock->offset[PRIMARYKEY_TIMESTAMP_COL_INDEX],
maxReads, pQuery->ekey, 1);
numOfReads = (lastPos >= 0) ? pQuery->pos - lastPos + 1 : 0;
}
}
if (numOfReads > pQuery->pointsToRead - pQuery->pointsRead) {
numOfReads = pQuery->pointsToRead - pQuery->pointsRead;
} else {
if (lastPos >= 0 || numOfReads == 0) {
pQuery->keyIsMet = 1;
pQuery->over = 1;
}
}
startPos = QUERY_IS_ASC_QUERY(pQuery) ? pQuery->pos : pQuery->pos - numOfReads + 1;
int32_t numOfQualifiedPoints = 0;
int32_t numOfActualRead = numOfReads;
if (pQuery->numOfFilterCols == 0) {
for (col = 0; col < pQuery->numOfOutputCols; ++col) {
int16_t colIdx = pQuery->pSelectExpr[col].pBase.colInfo.colIdx;
int16_t bytes = GET_COLUMN_BYTES(pQuery, col);
int16_t type = GET_COLUMN_TYPE(pQuery, col);
pData = pQuery->sdata[col]->data + pQuery->pointsOffset * bytes;
/* this column is absent from current block, fill this block with null value */
if (colIdx < 0 || colIdx >= pObj->numOfColumns ||
pObj->schema[colIdx].colId != pQuery->pSelectExpr[col].pBase.colInfo.colId) { // set null
setNullN(pData, type, bytes, pCacheBlock->numOfPoints);
} else {
pRead = pCacheBlock->offset[colIdx] + startPos * bytes;
if (QUERY_IS_ASC_QUERY(pQuery)) {
memcpy(pData, pRead, numOfReads * bytes);
} else {
for(int32_t j = 0; j < numOfReads; ++j) {
memcpy(pData + bytes * j, pRead + (numOfReads - 1 - j) * bytes, bytes);
}
}
}
}
numOfQualifiedPoints = numOfReads;
} else { // check each data one by one
// set the input column data
for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
int16_t colIdx = pQuery->pFilterInfo[k].info.colIdx;
if (colIdx < 0) { // current data has not specified column
pQuery->pFilterInfo[k].pData = NULL;
} else {
pQuery->pFilterInfo[k].pData = pCacheBlock->offset[colIdx];
}
}
int32_t *ids = calloc(1, numOfReads * sizeof(int32_t));
numOfActualRead = 0;
if (QUERY_IS_ASC_QUERY(pQuery)) {
for (int32_t j = startPos; j < pCacheBlock->numOfPoints; ++j) {
TSKEY key = vnodeGetTSInCacheBlock(pCacheBlock, j);
if (key < startkey || key > endkey) {
dError("vid:%d sid:%d id:%s, timestamp in cache slot is disordered. slot:%d, pos:%d, ts:%" PRId64 ", block "
"range:%" PRId64 "-%" PRId64, pObj->vnode, pObj->sid, pObj->meterId, pQuery->slot, j, key, startkey, endkey);
tfree(ids);
return -TSDB_CODE_FILE_BLOCK_TS_DISORDERED;
}
if (key > pQuery->ekey) {
break;
}
if (!vnodeFilterData(pQuery, &numOfActualRead, j)) {
continue;
}
ids[numOfQualifiedPoints] = j;
if (++numOfQualifiedPoints == numOfReads) { // qualified data are enough
break;
}
}
} else {
startPos = pQuery->pos;
for (int32_t j = startPos; j >= 0; --j) {
TSKEY key = vnodeGetTSInCacheBlock(pCacheBlock, j);
if (key < startkey || key > endkey) {
dError("vid:%d sid:%d id:%s, timestamp in cache slot is disordered. slot:%d, pos:%d, ts:%" PRId64 ", block "
"range:%" PRId64 "-%" PRId64, pObj->vnode, pObj->sid, pObj->meterId, pQuery->slot, j, key, startkey, endkey);
tfree(ids);
return -TSDB_CODE_FILE_BLOCK_TS_DISORDERED;
}
if (key < pQuery->ekey) {
break;
}
if (!vnodeFilterData(pQuery, &numOfActualRead, j)) {
continue;
}
ids[numOfQualifiedPoints] = j;
if (++numOfQualifiedPoints == numOfReads) { // qualified data are enough
break;
}
}
}
// int32_t start = QUERY_IS_ASC_QUERY(pQuery) ? 0 : numOfReads - numOfQualifiedPoints;
for (int32_t j = 0; j < numOfQualifiedPoints; ++j) {
for (int32_t col = 0; col < pQuery->numOfOutputCols; ++col) {
int16_t colIndex = pQuery->pSelectExpr[col].pBase.colInfo.colIdx;
int32_t bytes = pObj->schema[colIndex].bytes;
pData = pQuery->sdata[col]->data + (pQuery->pointsOffset + j) * bytes;
pRead = pCacheBlock->offset[colIndex] + ids[j/* + start*/] * bytes;
memcpy(pData, pRead, bytes);
}
}
tfree(ids);
assert(numOfQualifiedPoints <= numOfReads);
}
pQuery->pointsRead += numOfQualifiedPoints;
pQuery->pos -= numOfActualRead * step;
// update the skey/lastkey
int32_t lastAccessPos = pQuery->pos + step;
pQuery->lastKey = vnodeGetTSInCacheBlock(pCacheBlock, lastAccessPos);
pQuery->skey = pQuery->lastKey - step;
int update = 0; // go to next slot after this round
if ((pQuery->pos < 0 || pQuery->pos >= pObj->pointsPerBlock || numOfReads == 0) && (pQuery->over == 0)) update = 1;
// if block is changed, it shall be thrown away, it won't happen for committing
if (pObj != pCacheBlock->pMeterObj || pCacheBlock->blockId > pQuery->blockId) {
update = 1;
pQuery->pointsRead = 0;
dWarn("vid:%d sid:%d id:%s, cache block is overwritten, slot:%d blockId:%d qBlockId:%d",
pObj->vnode, pObj->sid, pObj->meterId, pQuery->slot, pCacheBlock->blockId, pQuery->blockId);
}
if (update) vnodeUpdateQuerySlotPos(pInfo, pQuery);
for (col = 0; col < pQuery->numOfOutputCols; ++col) {
int16_t bytes = GET_COLUMN_BYTES(pQuery, col);
pQuery->sdata[col]->len = bytes * (pQuery->pointsRead + pQuery->pointsOffset);
}
return pQuery->pointsRead;
}
void vnodeSearchPointInCache(SMeterObj *pObj, SQuery *pQuery) {
int numOfBlocks;
int firstSlot, lastSlot, midSlot;
TSKEY keyFirst, keyLast;
SCacheBlock *pBlock;
SCacheInfo * pInfo = (SCacheInfo *)pObj->pCache;
SCachePool * pPool = (SCachePool *)vnodeList[pObj->vnode].pCachePool;
pQuery->slot = -1;
pQuery->pos = -1;
// save these variables first in case it may be changed by write operation
pthread_mutex_lock(&pPool->vmutex);
numOfBlocks = pInfo->numOfBlocks;
lastSlot = pInfo->currentSlot;
pthread_mutex_unlock(&pPool->vmutex);
if (numOfBlocks <= 0) return;
firstSlot = (lastSlot - numOfBlocks + 1 + pInfo->maxBlocks) % pInfo->maxBlocks;
// make sure it is there, otherwise, return right away
pBlock = pInfo->cacheBlocks[firstSlot];
keyFirst = vnodeGetTSInCacheBlock(pBlock, 0);
pBlock = pInfo->cacheBlocks[lastSlot];
keyLast = vnodeGetTSInCacheBlock(pBlock, pBlock->numOfPoints - 1);
pQuery->blockId = pBlock->blockId;
pQuery->currentSlot = lastSlot;
pQuery->numOfBlocks = numOfBlocks;
pQuery->firstSlot = firstSlot;
if (!QUERY_IS_ASC_QUERY(pQuery)) {
if (pQuery->skey < keyFirst) return;
if (pQuery->ekey > keyLast) return;
} else {
if (pQuery->skey > keyLast) return;
if (pQuery->ekey < keyFirst) return;
}
while (1) {
numOfBlocks = (lastSlot - firstSlot + 1 + pInfo->maxBlocks) % pInfo->maxBlocks;
if (numOfBlocks == 0) numOfBlocks = pInfo->maxBlocks;
midSlot = (firstSlot + (numOfBlocks >> 1)) % pInfo->maxBlocks;
pBlock = pInfo->cacheBlocks[midSlot];
keyFirst = vnodeGetTSInCacheBlock(pBlock, 0);
keyLast = vnodeGetTSInCacheBlock(pBlock, pBlock->numOfPoints - 1);
if (numOfBlocks == 1) break;
if (pQuery->skey > keyLast) {
if (numOfBlocks == 2) break;
if (!QUERY_IS_ASC_QUERY(pQuery)) {
int nextSlot = (midSlot + 1 + pInfo->maxBlocks) % pInfo->maxBlocks;
SCacheBlock *pNextBlock = pInfo->cacheBlocks[nextSlot];
TSKEY nextKeyFirst = vnodeGetTSInCacheBlock(pNextBlock, 0);
if (pQuery->skey < nextKeyFirst) break;
}
firstSlot = (midSlot + 1) % pInfo->maxBlocks;
} else if (pQuery->skey < keyFirst) {
if (QUERY_IS_ASC_QUERY(pQuery)) {
int prevSlot = (midSlot - 1 + pInfo->maxBlocks) % pInfo->maxBlocks;
SCacheBlock *pPrevBlock = pInfo->cacheBlocks[prevSlot];
TSKEY prevKeyLast = vnodeGetTSInCacheBlock(pPrevBlock, pPrevBlock->numOfPoints - 1);
if (pQuery->skey > prevKeyLast) break;
}
lastSlot = (midSlot - 1 + pInfo->maxBlocks) % pInfo->maxBlocks;
} else {
break; // got the slot
}
}
pQuery->slot = midSlot;
if (!QUERY_IS_ASC_QUERY(pQuery)) {
if (pQuery->skey < keyFirst) return;
if (pQuery->ekey > keyLast) {
pQuery->slot = (midSlot + 1 + pInfo->maxBlocks) % pInfo->maxBlocks;
return;
}
} else {
if (pQuery->skey > keyLast) {
pQuery->slot = (midSlot + 1 + pInfo->maxBlocks) % pInfo->maxBlocks;
return;
}
if (pQuery->ekey < keyFirst) return;
}
// midSlot and pBlock is the search result
pBlock = pInfo->cacheBlocks[midSlot];
pQuery->pos = (*vnodeSearchKeyFunc[pObj->searchAlgorithm])(pBlock->offset[0], pBlock->numOfPoints, pQuery->skey,
pQuery->order.order);
pQuery->key = vnodeGetTSInCacheBlock(pBlock, pQuery->pos);
if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) {
int maxReads = QUERY_IS_ASC_QUERY(pQuery) ? pBlock->numOfPoints - pQuery->pos : pQuery->pos + 1;
if (pQuery->limit.offset < maxReads) { // start position in current block
if (QUERY_IS_ASC_QUERY(pQuery)) {
pQuery->pos += pQuery->limit.offset;
} else {
pQuery->pos -= pQuery->limit.offset;
}
pQuery->key = vnodeGetTSInCacheBlock(pBlock, pQuery->pos);
pQuery->limit.offset = 0;
} else if (pInfo->numOfBlocks == 1) {
pQuery->pos = -1; // no qualified data
} else {
int step = QUERY_IS_ASC_QUERY(pQuery) ? 1 : -1;
pQuery->limit.offset -= maxReads;
midSlot = (midSlot + step + pInfo->maxBlocks) % pInfo->maxBlocks;
bool hasData = true;
while (pQuery->limit.offset > pInfo->cacheBlocks[midSlot]->numOfPoints) {
pQuery->limit.offset -= pInfo->cacheBlocks[midSlot]->numOfPoints;
if ((QUERY_IS_ASC_QUERY(pQuery) && midSlot == pQuery->currentSlot) ||
(!QUERY_IS_ASC_QUERY(pQuery) && midSlot == pQuery->firstSlot)) { // no qualified data in cache
hasData = false;
break;
}
midSlot = (midSlot + step + pInfo->maxBlocks) % pInfo->maxBlocks;
}
if (hasData) {
if (QUERY_IS_ASC_QUERY(pQuery)) {
pQuery->pos = pQuery->limit.offset;
} else {
pQuery->pos = pInfo->cacheBlocks[midSlot]->numOfPoints - pQuery->limit.offset - 1;
}
pQuery->limit.offset = 0;
pQuery->slot = midSlot;
pQuery->key = vnodeGetTSInCacheBlock(pInfo->cacheBlocks[midSlot], pQuery->pos);
} else {
pQuery->pos = -1; // no qualified data
pBlock = pInfo->cacheBlocks[midSlot];
if (QUERY_IS_ASC_QUERY(pQuery)) {
pQuery->lastKey = vnodeGetTSInCacheBlock(pBlock, pBlock->numOfPoints - 1);
pQuery->skey = pQuery->lastKey + 1;
} else {
pQuery->lastKey = vnodeGetTSInCacheBlock(pBlock, 0);
pQuery->skey = pQuery->lastKey - 1;
}
}
}
}
return;
}
void vnodeSetCommitQuery(SMeterObj *pObj, SQuery *pQuery) {
SCacheInfo *pInfo = (SCacheInfo *)pObj->pCache;
SCachePool *pPool = (SCachePool *)vnodeList[pObj->vnode].pCachePool;
SVnodeObj * pVnode = vnodeList + pObj->vnode;
pQuery->order.order = TSQL_SO_ASC;
pQuery->numOfCols = pObj->numOfColumns;
pQuery->numOfOutputCols = pObj->numOfColumns;
for (int16_t col = 0; col < pObj->numOfColumns; ++col) {
pQuery->colList[col].colIdxInBuf = col;
pQuery->colList[col].data.colId = pObj->schema[col].colId;
pQuery->colList[col].data.bytes = pObj->schema[col].bytes;
pQuery->colList[col].data.type = pObj->schema[col].type;
SColIndexEx *pColIndexEx = &pQuery->pSelectExpr[col].pBase.colInfo;
pColIndexEx->colId = pObj->schema[col].colId;
pColIndexEx->colIdx = col;
pColIndexEx->colIdxInBuf = col;
pColIndexEx->flag = TSDB_COL_NORMAL;
}
pQuery->slot = pInfo->commitSlot;
pQuery->pos = pInfo->commitPoint;
pQuery->over = 0;
pthread_mutex_lock(&pPool->vmutex);
pQuery->currentSlot = pInfo->currentSlot;
pQuery->numOfBlocks = pInfo->numOfBlocks;
pthread_mutex_unlock(&pPool->vmutex);
if (pQuery->numOfBlocks <= 0 || pQuery->firstSlot < 0) {
pQuery->over = 1;
return;
}
pQuery->firstSlot = (pQuery->currentSlot - pQuery->numOfBlocks + 1 + pInfo->maxBlocks) % pInfo->maxBlocks;
pQuery->blockId = pInfo->cacheBlocks[pQuery->currentSlot]->blockId;
SCacheBlock *pCacheBlock;
pCacheBlock = pInfo->cacheBlocks[pInfo->commitSlot];
if (pInfo->commitSlot == pQuery->currentSlot && pInfo->commitPoint == pCacheBlock->numOfPoints) {
dTrace("vid:%d sid:%d id:%s, no new data to commit", pObj->vnode, pObj->sid, pObj->meterId);
pQuery->over = 1;
return;
}
if (pQuery->pos == pObj->pointsPerBlock) {
pQuery->slot = (pQuery->slot + 1) % pInfo->maxBlocks;
pQuery->pos = 0;
}
pCacheBlock = pInfo->cacheBlocks[pQuery->slot];
TSKEY firstKey = *((TSKEY *)(pCacheBlock->offset[0] + pQuery->pos * pObj->schema[0].bytes));
if (firstKey < pQuery->skey) {
pQuery->over = 1;
dTrace("vid:%d sid:%d id:%s, first key is small, keyFirst:%" PRId64 " commitFirstKey:%" PRId64 "",
pObj->vnode, pObj->sid, pObj->meterId, firstKey, pQuery->skey);
pthread_mutex_lock(&(pVnode->vmutex));
if (firstKey < pVnode->firstKey) pVnode->firstKey = firstKey;
assert(pVnode->firstKey > 0);
pthread_mutex_unlock(&(pVnode->vmutex));
}
}
int vnodeSyncRetrieveVnodeStatistic(int vnode, int fd) {
SVnodeObj *pVnode = vnodeList + vnode;
if (taosWriteMsg(fd, &(pVnode->vnodeStatistic.pointsWritten), sizeof(int64_t)) < 0) return -1;
if (taosWriteMsg(fd, &(pVnode->vnodeStatistic.totalStorage), sizeof(int64_t)) < 0) return -1;
if (taosWriteMsg(fd, &(pVnode->vnodeStatistic.compStorage), sizeof(int64_t)) < 0) return -1;
return 0;
}
int vnodeSyncRestoreVnodeStatistic(int vnode, int fd) {
SVnodeObj *pVnode = vnodeList + vnode;
if (taosReadMsg(fd, &(pVnode->vnodeStatistic.pointsWritten), sizeof(int64_t)) < 0) return -1;
if (taosReadMsg(fd, &(pVnode->vnodeStatistic.totalStorage), sizeof(int64_t)) < 0) return -1;
if (taosReadMsg(fd, &(pVnode->vnodeStatistic.compStorage), sizeof(int64_t)) < 0) return -1;
return 0;
}
int vnodeSyncRetrieveCache(int vnode, int fd) {
int32_t sid, slot, points;
SVnodeObj * pVnode;
SMeterObj * pObj;
SCacheInfo * pInfo;
SCacheBlock *pBlock;
int blocksSent, pointsSent;
pVnode = vnodeList + vnode;
points = 0;
SVnodeCfg *pCfg = &vnodeList[vnode].cfg;
for (sid = 0; sid < pCfg->maxSessions; ++sid) {
pObj = pVnode->meterList[sid];
if (pObj == NULL) continue;
pInfo = (SCacheInfo *)pObj->pCache;
if (pInfo == NULL) continue;
// write sid first
if (taosWriteMsg(fd, &sid, sizeof(sid)) <= 0) return -1;
if (taosWriteMsg(fd, &(pObj->lastKey), sizeof(pObj->lastKey)) <= 0) return -1;
if (taosWriteMsg(fd, &(pObj->lastKeyOnFile), sizeof(pObj->lastKeyOnFile)) <= 0) return -1;
if (taosWriteMsg(fd, &(pInfo->commitPoint), sizeof(pInfo->commitPoint)) <= 0) return -1;
dTrace("vid:%d sid:%d id:%s, send lastKey:%" PRId64 " lastKeyOnFile:%" PRId64, vnode, sid, pObj->meterId, pObj->lastKey,
pObj->lastKeyOnFile);
slot = pInfo->commitSlot;
blocksSent = 0;
pointsSent = 0;
while (pInfo->numOfBlocks > 0) {
pBlock = pInfo->cacheBlocks[slot];
if (pBlock->numOfPoints == 0) break;
// write the number of points
points = pBlock->numOfPoints;
if (taosWriteMsg(fd, &(points), sizeof(points)) <= 0) return -1;
// write the data
for (int col = 0; col < pObj->numOfColumns; ++col)
if (taosWriteMsg(fd, pBlock->offset[col], pObj->schema[col].bytes * points) <= 0) return -1;
TSKEY lastKey = *((TSKEY *)(pBlock->offset[0] + pObj->schema[0].bytes * (points - 1)));
dTrace("vid:%d sid:%d id:%s, cache block is sent, points:%d lastKey:%" PRId64, vnode, sid, pObj->meterId, points,
lastKey);
blocksSent++;
pointsSent += pBlock->numOfPoints;
if (slot == pInfo->currentSlot) break;
slot = (slot + 1) % pInfo->maxBlocks;
}
// set number of points as zero at the end
points = 0;
if (taosWriteMsg(fd, &(points), sizeof(points)) <= 0) return -1;
}
sid = -1;
if (taosWriteMsg(fd, &sid, sizeof(sid)) < 0) return -1;
if (vnodeSyncRetrieveVnodeStatistic(vnode, fd) < 0) return -1;
return 0;
}
int vnodeSyncRestoreCache(int vnode, int fd) {
int32_t sid, points, i, slot;
SMeterObj * pObj;
SCacheInfo * pInfo;
SCacheBlock *pBlock;
int blocksReceived, pointsReceived;
int numOfBlocks;
SVnodeCfg * pCfg = &vnodeList[vnode].cfg;
SCachePool * pPool = (SCachePool *)vnodeList[vnode].pCachePool;
while (1) {
// read sid first
if (taosReadMsg(fd, &sid, sizeof(sid)) <= 0) return -1;
if (sid >= pCfg->maxSessions) {
dError("vid:%d, restore cache, sid:%d is messed up", vnode, sid);
return -1;
}
if (sid < 0) break;
pObj = vnodeList[vnode].meterList[sid];
if (pObj == NULL) {
dError("vid:%d sid:%d, meter is not there", vnode, sid);
vnodeSendMeterCfgMsg(vnode, sid);
return -1;
}
pInfo = (SCacheInfo *)pObj->pCache;
numOfBlocks = pInfo->numOfBlocks;
pthread_mutex_lock(&pPool->vmutex);
for (i = 0; i < numOfBlocks; ++i) {
slot = (pInfo->currentSlot - i + pInfo->maxBlocks) % pInfo->maxBlocks;
pBlock = pInfo->cacheBlocks[slot];
vnodeFreeCacheBlock(pBlock);
}
pthread_mutex_unlock(&pPool->vmutex);
pInfo->unCommittedBlocks = 0;
if (taosReadMsg(fd, &(pObj->lastKey), sizeof(pObj->lastKey)) <= 0) return -1;
if (taosReadMsg(fd, &(pObj->lastKeyOnFile), sizeof(pObj->lastKeyOnFile)) <= 0) return -1;
if (taosReadMsg(fd, &(pInfo->commitPoint), sizeof(pInfo->commitPoint)) <= 0) return -1;
dTrace("vid:%d sid:%d id:%s, commitPoint:%d lastKeyOnFile:%" PRId64, vnode, sid, pObj->meterId, pInfo->commitPoint,
pObj->lastKeyOnFile);
if (vnodeList[pObj->vnode].lastKey < pObj->lastKey) vnodeList[pObj->vnode].lastKey = pObj->lastKey;
if (vnodeList[pObj->vnode].lastKeyOnFile < pObj->lastKeyOnFile)
vnodeList[pObj->vnode].lastKeyOnFile = pObj->lastKeyOnFile;
pInfo->currentSlot = -1;
pInfo->commitSlot = 0;
memset(pInfo->cacheBlocks, 0, sizeof(SCacheBlock *) * pInfo->maxBlocks);
blocksReceived = 0;
pointsReceived = 0;
pObj->freePoints = pObj->pointsPerBlock * pInfo->maxBlocks;
while (1) {
// read number of points;
points = 0;
if (taosReadMsg(fd, &points, sizeof(points)) <= 0) return -1;
if (points == 0) break;
if (vnodeAllocateCacheBlock(pObj) < 0) return -1;
pBlock = pInfo->cacheBlocks[pInfo->currentSlot];
pBlock->numOfPoints = points;
// read the data
for (int col = 0; col < pObj->numOfColumns; ++col)
if (taosReadMsg(fd, pBlock->offset[col], pObj->schema[col].bytes * points) <= 0) return -1;
atomic_fetch_sub_32(&pObj->freePoints, points);
blocksReceived++;
pointsReceived += points;
pObj->lastKey = *((TSKEY *)(pBlock->offset[0] + pObj->schema[0].bytes * (points - 1)));
if (vnodeList[pObj->vnode].lastKey < pObj->lastKey) vnodeList[pObj->vnode].lastKey = pObj->lastKey;
if (vnodeList[pObj->vnode].firstKey > *(TSKEY *)(pBlock->offset[0]))
vnodeList[pObj->vnode].firstKey = *(TSKEY *)(pBlock->offset[0]);
dTrace("vid:%d sid:%d id:%s, cache block is received, points:%d lastKey:%" PRId64, vnode, sid, pObj->meterId, points,
pObj->lastKey);
}
}
if (vnodeSyncRestoreVnodeStatistic(pObj->vnode, fd) < 0) return -1;
return 0;
}
int vnodeIsCacheCommitted(SMeterObj *pObj) {
if (pObj->pCache == NULL) return 1;
SCacheInfo *pInfo = (SCacheInfo *)pObj->pCache;
if (pInfo->currentSlot < 0) return 1;
SCacheBlock *pBlock = pInfo->cacheBlocks[pInfo->currentSlot];
if (pInfo->commitSlot != pInfo->currentSlot) return 0;
if (pInfo->commitPoint != pBlock->numOfPoints) return 0;
return 1;
}
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _GNU_SOURCE /* See feature_test_macros(7) */
#include "os.h"
#include "taosdef.h"
#include "vnode.h"
#include "vnodeUtil.h"
#include "vnodeStatus.h"
typedef struct {
int sversion;
int sid;
int contLen;
int action:8;
int simpleCheck:24;
} SCommitHead;
int vnodeOpenCommitLog(int vnode, uint64_t firstV) {
SVnodeObj *pVnode = vnodeList + vnode;
char * fileName = pVnode->logFn;
pVnode->logFd = open(fileName, O_RDWR | O_CREAT, S_IRWXU | S_IRWXG | S_IRWXO);
if (pVnode->logFd < 0) {
dError("vid:%d, failed to open file:%s, reason:%s", vnode, fileName, strerror(errno));
return -1;
}
dTrace("vid:%d, logfd:%d, open file:%s success", vnode, pVnode->logFd, fileName);
if (posix_fallocate64(pVnode->logFd, 0, pVnode->mappingSize) != 0) {
dError("vid:%d, logfd:%d, failed to alloc file size:%d, reason:%s", vnode, pVnode->logFd, pVnode->mappingSize, strerror(errno));
perror("fallocate failed");
goto _err_log_open;
}
struct stat statbuf;
stat(fileName, &statbuf);
int64_t length = statbuf.st_size;
if (length != pVnode->mappingSize) {
dError("vid:%d, logfd:%d, alloc file size:%" PRId64 " not equal to mapping size:%" PRId64, vnode, pVnode->logFd, length,
pVnode->mappingSize);
goto _err_log_open;
}
pVnode->pMem = mmap(0, pVnode->mappingSize, PROT_WRITE | PROT_READ, MAP_SHARED, pVnode->logFd, 0);
if (pVnode->pMem == MAP_FAILED) {
dError("vid:%d, logfd:%d, failed to map file, reason:%s", vnode, pVnode->logFd, strerror(errno));
goto _err_log_open;
}
pVnode->pWrite = pVnode->pMem;
memcpy(pVnode->pWrite, &(firstV), sizeof(firstV));
pVnode->pWrite += sizeof(firstV);
return pVnode->logFd;
_err_log_open:
close(pVnode->logFd);
remove(fileName);
pVnode->logFd = -1;
return -1;
}
int vnodeRenewCommitLog(int vnode) {
SVnodeObj *pVnode = vnodeList + vnode;
char * fileName = pVnode->logFn;
char * oldName = pVnode->logOFn;
pthread_mutex_lock(&(pVnode->logMutex));
if (FD_VALID(pVnode->logFd)) {
munmap(pVnode->pMem, pVnode->mappingSize);
close(pVnode->logFd);
rename(fileName, oldName);
}
if (pVnode->cfg.commitLog) vnodeOpenCommitLog(vnode, vnodeList[vnode].version);
pthread_mutex_unlock(&(pVnode->logMutex));
return pVnode->logFd;
}
void vnodeRemoveCommitLog(int vnode) { remove(vnodeList[vnode].logOFn); }
size_t vnodeRestoreDataFromLog(int vnode, char *fileName, uint64_t *firstV) {
int fd, ret;
char * cont = NULL;
size_t totalLen = 0;
int actions = 0;
SVnodeObj *pVnode = vnodeList + vnode;
if (pVnode->meterList == NULL) {
dError("vid:%d, vnode is not initialized!!!", vnode);
return 0;
}
struct stat fstat;
if (stat(fileName, &fstat) < 0) {
dTrace("vid:%d, no log file:%s", vnode, fileName);
return 0;
}
dTrace("vid:%d, uncommitted data in file:%s, restore them ...", vnode, fileName);
fd = open(fileName, O_RDWR);
if (fd < 0) {
dError("vid:%d, failed to open:%s, reason:%s", vnode, fileName, strerror(errno));
goto _error;
}
ret = read(fd, firstV, sizeof(pVnode->version));
if (ret <= 0) {
dError("vid:%d, failed to read version", vnode);
goto _error;
}
pVnode->version = *firstV;
int32_t bufLen = TSDB_PAYLOAD_SIZE;
cont = calloc(1, bufLen);
if (cont == NULL) {
dError("vid:%d, out of memory", vnode);
goto _error;
}
TSKEY now = taosGetTimestamp(pVnode->cfg.precision);
SCommitHead head;
int simpleCheck = 0;
while (1) {
ret = read(fd, &head, sizeof(head));
if (ret < 0) goto _error;
if (ret == 0) break;
if (((head.sversion+head.sid+head.contLen+head.action) & 0xFFFFFF) != head.simpleCheck) break;
simpleCheck = head.simpleCheck;
// head.contLen validation is removed
if (head.sid >= pVnode->cfg.maxSessions || head.sid < 0 || head.action >= TSDB_ACTION_MAX) {
dError("vid, invalid commit head, sid:%d contLen:%d action:%d", head.sid, head.contLen, head.action);
} else {
if (head.contLen > 0) {
if (bufLen < head.contLen+sizeof(simpleCheck)) { // pre-allocated buffer is not enough
cont = realloc(cont, head.contLen+sizeof(simpleCheck));
bufLen = head.contLen+sizeof(simpleCheck);
}
if (read(fd, cont, head.contLen+sizeof(simpleCheck)) < 0) goto _error;
if (*(int *)(cont+head.contLen) != simpleCheck) break;
SMeterObj *pObj = pVnode->meterList[head.sid];
if (pObj == NULL) {
dError("vid:%d, sid:%d not exists, ignore data in commit log, contLen:%d action:%d",
vnode, head.sid, head.contLen, head.action);
continue;
}
if (vnodeIsMeterState(pObj, TSDB_METER_STATE_DROPPING)) {
dWarn("vid:%d sid:%d id:%s, meter is dropped, ignore data in commit log, contLen:%d action:%d",
vnode, head.sid, head.contLen, head.action);
continue;
}
int32_t numOfPoints = 0;
(*vnodeProcessAction[head.action])(pObj, cont, head.contLen, TSDB_DATA_SOURCE_LOG, NULL, head.sversion,
&numOfPoints, now);
actions++;
} else {
break;
}
}
totalLen += sizeof(head) + head.contLen + sizeof(simpleCheck);
}
tclose(fd);
tfree(cont);
dTrace("vid:%d, %d pieces of uncommitted data are restored", vnode, actions);
return totalLen;
_error:
tclose(fd);
tfree(cont);
dError("vid:%d, failed to restore %s, remove this node...", vnode, fileName);
// rename to error file for future process
char *f = NULL;
taosFileRename(fileName, "error", '/', &f);
free(f);
return -1;
}
int vnodeInitCommit(int vnode) {
size_t size = 0;
uint64_t firstV = 0;
SVnodeObj *pVnode = vnodeList + vnode;
pthread_mutex_init(&(pVnode->logMutex), NULL);
sprintf(pVnode->logFn, "%s/vnode%d/db/submit%d.log", tsDirectory, vnode, vnode);
sprintf(pVnode->logOFn, "%s/vnode%d/db/submit%d.olog", tsDirectory, vnode, vnode);
pVnode->mappingSize = ((int64_t)pVnode->cfg.cacheBlockSize) * pVnode->cfg.cacheNumOfBlocks.totalBlocks * 1.5;
pVnode->mappingThreshold = pVnode->mappingSize * 0.7;
// restore from .olog file and commit to file
size = vnodeRestoreDataFromLog(vnode, pVnode->logOFn, &firstV);
if (size < 0) return -1;
if (size > 0) {
if (pVnode->commitInProcess == 0) vnodeCommitToFile(pVnode);
remove(pVnode->logOFn);
}
// restore from .log file to cache
size = vnodeRestoreDataFromLog(vnode, pVnode->logFn, &firstV);
if (size < 0) return -1;
if (pVnode->cfg.commitLog == 0) return 0;
if (size == 0) firstV = pVnode->version;
if (vnodeOpenCommitLog(vnode, firstV) < 0) {
dError("vid:%d, commit log init failed", vnode);
return -1;
}
pVnode->pWrite += size;
dPrint("vid:%d, commit log is initialized", vnode);
return 0;
}
void vnodeCleanUpCommit(int vnode) {
SVnodeObj *pVnode = vnodeList + vnode;
if (FD_VALID(pVnode->logFd)) close(pVnode->logFd);
if (pVnode->cfg.commitLog && (pVnode->logFd > 0 && remove(pVnode->logFn) < 0)) {
dError("vid:%d, failed to remove:%s", vnode, pVnode->logFn);
taosLogError("vid:%d, failed to remove:%s", vnode, pVnode->logFn);
}
pthread_mutex_destroy(&(pVnode->logMutex));
}
int vnodeWriteToCommitLog(SMeterObj *pObj, char action, char *cont, int contLen, int sverion) {
SVnodeObj *pVnode = vnodeList + pObj->vnode;
if (pVnode->pWrite == NULL) return 0;
SCommitHead head;
head.sid = pObj->sid;
head.action = action;
head.sversion = pObj->sversion;
head.contLen = contLen;
head.simpleCheck = (head.sversion+head.sid+head.contLen+head.action) & 0xFFFFFF;
int simpleCheck = head.simpleCheck;
pthread_mutex_lock(&(pVnode->logMutex));
// 100 bytes redundant mem space
if (pVnode->mappingSize - (pVnode->pWrite - pVnode->pMem) < contLen + sizeof(SCommitHead) + sizeof(simpleCheck) + 100) {
pthread_mutex_unlock(&(pVnode->logMutex));
dTrace("vid:%d, mem mapping space is not enough, wait for commit", pObj->vnode);
vnodeProcessCommitTimer(pVnode, NULL);
return TSDB_CODE_ACTION_IN_PROGRESS;
}
char *pWrite = pVnode->pWrite;
pVnode->pWrite += sizeof(head) + contLen + sizeof(simpleCheck);
memcpy(pWrite, (char *)&head, sizeof(head));
memcpy(pWrite + sizeof(head), cont, contLen);
memcpy(pWrite + sizeof(head) + contLen, &simpleCheck, sizeof(simpleCheck));
pthread_mutex_unlock(&(pVnode->logMutex));
if (pVnode->pWrite - pVnode->pMem > pVnode->mappingThreshold) {
dTrace("vid:%d, mem mapping is close to limit, commit", pObj->vnode);
vnodeProcessCommitTimer(pVnode, NULL);
}
dTrace("vid:%d sid:%d, data is written to commit log", pObj->vnode, pObj->sid);
return 0;
}
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "os.h"
#include "tscompression.h"
#include "tutil.h"
#include "vnode.h"
#include "vnodeFile.h"
#include "vnodeUtil.h"
#include "vnodeStatus.h"
#define FILE_QUERY_NEW_BLOCK -5 // a special negative number
const int16_t vnodeFileVersion = 0;
int (*pCompFunc[])(const char *const input, int inputSize, const int elements, char *const output, int outputSize,
char algorithm, char *const buffer, int bufferSize) = {NULL,
tsCompressBool,
tsCompressTinyint,
tsCompressSmallint,
tsCompressInt,
tsCompressBigint,
tsCompressFloat,
tsCompressDouble,
tsCompressString,
tsCompressTimestamp,
tsCompressString};
int (*pDecompFunc[])(const char *const input, int compressedSize, const int elements, char *const output,
int outputSize, char algorithm, char *const buffer, int bufferSize) = {NULL,
tsDecompressBool,
tsDecompressTinyint,
tsDecompressSmallint,
tsDecompressInt,
tsDecompressBigint,
tsDecompressFloat,
tsDecompressDouble,
tsDecompressString,
tsDecompressTimestamp,
tsDecompressString};
int vnodeUpdateFileMagic(int vnode, int fileId);
int vnodeRecoverCompHeader(int vnode, int fileId);
int vnodeRecoverHeadFile(int vnode, int fileId);
int vnodeRecoverDataFile(int vnode, int fileId);
int vnodeForwardStartPosition(SQuery *pQuery, SCompBlock *pBlock, int32_t slotIdx, SVnodeObj *pVnode, SMeterObj *pObj);
int vnodeCheckNewHeaderFile(int fd, SVnodeObj *pVnode);
char* vnodeGetDataDir(int vnode, int fileId);
char* vnodeGetDiskFromHeadFile(char *headName);
void vnodeAdustVnodeFile(SVnodeObj *pVnode);
int vnodeSyncRetrieveFile(int vnode, int fd, uint32_t peerFid, uint64_t *fmagic);
int vnodeSyncRestoreFile(int vnode, int sfd);
void vnodeAdjustFileTier(int vnode);
void vnodeGetHeadDataLname(char *headName, char *dataName, char *lastName, int vnode, int fileId) {
if (headName != NULL) sprintf(headName, "%s/vnode%d/db/v%df%d.head", tsDirectory, vnode, vnode, fileId);
if (dataName != NULL) sprintf(dataName, "%s/vnode%d/db/v%df%d.data", tsDirectory, vnode, vnode, fileId);
if (lastName != NULL) sprintf(lastName, "%s/vnode%d/db/v%df%d.last", tsDirectory, vnode, vnode, fileId);
}
void vnodeGetHeadDataDname(char *dHeadName, char *dDataName, char *dLastName, int vnode, int fileId, char *path) {
if (dHeadName != NULL) sprintf(dHeadName, "%s/data/vnode%d/v%df%d.head0", path, vnode, vnode, fileId);
if (dDataName != NULL) sprintf(dDataName, "%s/data/vnode%d/v%df%d.data", path, vnode, vnode, fileId);
if (dLastName != NULL) sprintf(dLastName, "%s/data/vnode%d/v%df%d.last0", path, vnode, vnode, fileId);
}
void vnodeGetDnameFromLname(char *lhead, char *ldata, char *llast, char *dhead, char *ddata, char *dlast) {
if (lhead != NULL) {
assert(dhead != NULL);
readlink(lhead, dhead, TSDB_FILENAME_LEN);
}
if (ldata != NULL) {
assert(ddata != NULL);
readlink(ldata, ddata, TSDB_FILENAME_LEN);
}
if (llast != NULL) {
assert(dlast != NULL);
readlink(llast, dlast, TSDB_FILENAME_LEN);
}
}
void vnodeGetHeadTname(char *nHeadName, char *nLastName, int vnode, int fileId) {
if (nHeadName != NULL) sprintf(nHeadName, "%s/vnode%d/db/v%df%d.t", tsDirectory, vnode, vnode, fileId);
if (nLastName != NULL) sprintf(nLastName, "%s/vnode%d/db/v%df%d.l", tsDirectory, vnode, vnode, fileId);
}
void vnodeCreateDataDirIfNeeded(int vnode, char *path) {
char directory[TSDB_FILENAME_LEN] = "\0";
sprintf(directory, "%s/data/vnode%d", path, vnode);
if (access(directory, F_OK) != 0) mkdir(directory, 0755);
}
int vnodeCreateHeadDataFile(int vnode, int fileId, char *headName, char *dataName, char *lastName) {
char dHeadName[TSDB_FILENAME_LEN];
char dDataName[TSDB_FILENAME_LEN];
char dLastName[TSDB_FILENAME_LEN];
char *path = vnodeGetDataDir(vnode, fileId);
if (path == NULL) {
dError("vid:%d, fileId:%d, failed to get dataDir", vnode, fileId);
return -1;
}
vnodeCreateDataDirIfNeeded(vnode, path);
vnodeGetHeadDataLname(headName, dataName, lastName, vnode, fileId);
vnodeGetHeadDataDname(dHeadName, dDataName, dLastName, vnode, fileId, path);
if (symlink(dHeadName, headName) != 0) return -1;
if (symlink(dDataName, dataName) != 0) return -1;
if (symlink(dLastName, lastName) != 0) return -1;
dPrint("vid:%d, fileId:%d, empty header file:%s file:%s lastFile:%s on disk:%s is created ",
vnode, fileId, headName, dataName, lastName, path);
return 0;
}
int vnodeCreateEmptyCompFile(int vnode, int fileId) {
char headName[TSDB_FILENAME_LEN];
char dataName[TSDB_FILENAME_LEN];
char lastName[TSDB_FILENAME_LEN];
int tfd;
char *temp;
if (vnodeCreateHeadDataFile(vnode, fileId, headName, dataName, lastName) < 0) {
dError("failed to create head data file, vnode: %d, fileId: %d", vnode, fileId);
return -1;
}
tfd = open(headName, O_WRONLY | O_CREAT | O_TRUNC, S_IRWXU | S_IRWXG | S_IRWXO);
if (tfd < 0) {
dError("failed to create head file:%s, reason:%s", headName, strerror(errno));
return -1;
}
vnodeCreateFileHeaderFd(tfd);
int size = sizeof(SCompHeader) * vnodeList[vnode].cfg.maxSessions + sizeof(TSCKSUM);
temp = malloc(size);
memset(temp, 0, size);
taosCalcChecksumAppend(0, (uint8_t *)temp, size);
lseek(tfd, TSDB_FILE_HEADER_LEN, SEEK_SET);
twrite(tfd, temp, size);
free(temp);
close(tfd);
tfd = open(dataName, O_WRONLY | O_CREAT | O_TRUNC, S_IRWXU | S_IRWXG | S_IRWXO);
if (tfd < 0) {
dError("failed to create data file:%s, reason:%s", dataName, strerror(errno));
return -1;
}
vnodeCreateFileHeaderFd(tfd);
close(tfd);
tfd = open(lastName, O_WRONLY | O_CREAT | O_TRUNC, S_IRWXU | S_IRWXG | S_IRWXO);
if (tfd < 0) {
dError("failed to create last file:%s, reason:%s", lastName, strerror(errno));
return -1;
}
vnodeCreateFileHeaderFd(tfd);
close(tfd);
return 0;
}
int vnodeCreateNeccessaryFiles(SVnodeObj *pVnode) {
int numOfFiles = 0, fileId, filesAdded = 0;
int vnode = pVnode->vnode;
SVnodeCfg *pCfg = &(pVnode->cfg);
if (pVnode->lastKeyOnFile == 0) {
if (pCfg->daysPerFile == 0) pCfg->daysPerFile = 10;
pVnode->fileId = pVnode->firstKey / tsMsPerDay[(uint8_t)pVnode->cfg.precision] / pCfg->daysPerFile;
pVnode->lastKeyOnFile = (int64_t)(pVnode->fileId + 1) * pCfg->daysPerFile * tsMsPerDay[(uint8_t)pVnode->cfg.precision] - 1;
pVnode->numOfFiles = 1;
if (vnodeCreateEmptyCompFile(vnode, pVnode->fileId) < 0) return -1;
}
numOfFiles = (pVnode->lastKeyOnFile - pVnode->commitFirstKey) / tsMsPerDay[(uint8_t)pVnode->cfg.precision] / pCfg->daysPerFile;
if (pVnode->commitFirstKey > pVnode->lastKeyOnFile) numOfFiles = -1;
dTrace("vid:%d, commitFirstKey:%" PRId64 " lastKeyOnFile:%" PRId64 " numOfFiles:%d fileId:%d vnodeNumOfFiles:%d", pVnode->vnode,
pVnode->commitFirstKey, pVnode->lastKeyOnFile, numOfFiles, pVnode->fileId, pVnode->numOfFiles);
if (numOfFiles >= pVnode->numOfFiles) {
// create empty header files backward
filesAdded = numOfFiles - pVnode->numOfFiles + 1;
assert(filesAdded <= pVnode->maxFiles + 2);
for (int i = 0; i < filesAdded; ++i) {
fileId = pVnode->fileId - pVnode->numOfFiles - i;
if (vnodeCreateEmptyCompFile(vnode, fileId) < 0)
#ifdef CLUSTER
return vnodeRecoverFromPeer(pVnode, fileId);
#else
return -1;
#endif
}
} else if (numOfFiles < 0) {
// create empty header files forward
pVnode->fileId++;
if (vnodeCreateEmptyCompFile(vnode, pVnode->fileId) < 0)
#ifdef CLUSTER
return vnodeRecoverFromPeer(pVnode, pVnode->fileId);
#else
return -1;
#endif
pVnode->lastKeyOnFile += (int64_t)tsMsPerDay[(uint8_t)pVnode->cfg.precision] * pCfg->daysPerFile;
filesAdded = 1;
numOfFiles = 0; // hacker way
}
fileId = pVnode->fileId - numOfFiles;
pVnode->commitLastKey =
pVnode->lastKeyOnFile - (int64_t)numOfFiles * tsMsPerDay[(uint8_t)pVnode->cfg.precision] * pCfg->daysPerFile;
pVnode->commitFirstKey = pVnode->commitLastKey - (int64_t)tsMsPerDay[(uint8_t)pVnode->cfg.precision] * pCfg->daysPerFile + 1;
pVnode->commitFileId = fileId;
pVnode->numOfFiles = pVnode->numOfFiles + filesAdded;
return 0;
}
int vnodeOpenCommitFiles(SVnodeObj *pVnode, int noTempLast) {
char name[TSDB_FILENAME_LEN];
char dHeadName[TSDB_FILENAME_LEN] = "\0";
char dLastName[TSDB_FILENAME_LEN] = "\0";
int len = 0;
struct stat filestat;
int vnode = pVnode->vnode;
int fileId;
if (vnodeCreateNeccessaryFiles(pVnode) < 0) return -1;
fileId = pVnode->commitFileId;
dTrace("vid:%d, commit fileId:%d, commitLastKey:%" PRId64 ", vnodeLastKey:%" PRId64 ", lastKeyOnFile:%" PRId64 " numOfFiles:%d",
vnode, fileId, pVnode->commitLastKey, pVnode->lastKey, pVnode->lastKeyOnFile, pVnode->numOfFiles);
int minSize = sizeof(SCompHeader) * pVnode->cfg.maxSessions + sizeof(TSCKSUM) + TSDB_FILE_HEADER_LEN;
vnodeGetHeadDataLname(pVnode->cfn, name, pVnode->lfn, vnode, fileId);
readlink(pVnode->cfn, dHeadName, TSDB_FILENAME_LEN);
readlink(pVnode->lfn, dLastName, TSDB_FILENAME_LEN);
len = strlen(dHeadName);
if (dHeadName[len - 1] == 'd') {
dHeadName[len] = '0';
dHeadName[len + 1] = '\0';
} else {
dHeadName[len - 1] = '0' + (dHeadName[len - 1] + 1 - '0') % 2;
}
len = strlen(dLastName);
if (dLastName[len - 1] == 't') {
dLastName[len] = '0';
dLastName[len + 1] = '\0';
} else {
dLastName[len - 1] = '0' + (dLastName[len - 1] + 1 - '0') % 2;
}
vnodeGetHeadTname(pVnode->nfn, pVnode->tfn, vnode, fileId);
symlink(dHeadName, pVnode->nfn);
if (!noTempLast) symlink(dLastName, pVnode->tfn);
// open head file
pVnode->hfd = open(pVnode->cfn, O_RDONLY);
if (pVnode->hfd < 0) {
dError("vid:%d, failed to open head file:%s, reason:%s", vnode, pVnode->cfn, strerror(errno));
taosLogError("vid:%d, failed to open head file:%s, reason:%s", vnode, pVnode->cfn, strerror(errno));
vnodeRecoverFromPeer(pVnode, fileId);
goto _error;
}
// verify head file, check size
fstat(pVnode->hfd, &filestat);
if (filestat.st_size < minSize) {
dError("vid:%d, head file:%s corrupted", vnode, pVnode->cfn);
taosLogError("vid:%d, head file:%s corrupted", vnode, pVnode->cfn);
vnodeRecoverFromPeer(pVnode, fileId);
goto _error;
}
// open a new header file
pVnode->nfd = open(pVnode->nfn, O_RDWR | O_CREAT | O_TRUNC, S_IRWXU | S_IRWXG | S_IRWXO);
if (pVnode->nfd < 0) {
dError("vid:%d, failed to open new head file:%s, reason:%s", vnode, pVnode->nfn, strerror(errno));
taosLogError("vid:%d, failed to open new head file:%s, reason:%s", vnode, pVnode->nfn, strerror(errno));
goto _error;
}
vnodeCreateFileHeaderFd(pVnode->nfd);
// open existing data file
pVnode->dfd = open(name, O_WRONLY | O_CREAT, S_IRWXU | S_IRWXG | S_IRWXO);
if (pVnode->dfd < 0) {
dError("vid:%d, failed to open data file:%s, reason:%s", vnode, name, strerror(errno));
taosLogError("vid:%d, failed to open data file:%s, reason:%s", vnode, name, strerror(errno));
vnodeRecoverFromPeer(pVnode, fileId);
goto _error;
}
// verify data file, check size
fstat(pVnode->dfd, &filestat);
if (filestat.st_size < TSDB_FILE_HEADER_LEN) {
dError("vid:%d, data file:%s corrupted", vnode, name);
taosLogError("vid:%d, data file:%s corrupted", vnode, name);
vnodeRecoverFromPeer(pVnode, fileId);
goto _error;
} else {
dPrint("vid:%d, data file:%s is opened to write", vnode, name);
}
// open last file
pVnode->lfd = open(pVnode->lfn, O_RDWR);
if (pVnode->lfd < 0) {
dError("vid:%d, failed to open last file:%s, reason:%s", vnode, pVnode->lfn, strerror(errno));
taosLogError("vid:%d, failed to open last file:%s, reason:%s", vnode, pVnode->lfn, strerror(errno));
vnodeRecoverFromPeer(pVnode, fileId);
goto _error;
}
// verify last file, check size
fstat(pVnode->lfd, &filestat);
if (filestat.st_size < TSDB_FILE_HEADER_LEN) {
dError("vid:%d, last file:%s corrupted", vnode, pVnode->lfn);
taosLogError("vid:%d, last file:%s corrupted", vnode, pVnode->lfn);
vnodeRecoverFromPeer(pVnode, fileId);
goto _error;
}
// open a new last file
if (noTempLast) {
pVnode->tfd = -1; // do not open temporary last file
} else {
pVnode->tfd = open(pVnode->tfn, O_RDWR | O_CREAT | O_TRUNC, S_IRWXU | S_IRWXG | S_IRWXO);
if (pVnode->tfd < 0) {
dError("vid:%d, failed to open new last file:%s, reason:%s", vnode, pVnode->tfn, strerror(errno));
taosLogError("vid:%d, failed to open new last file:%s, reason:%s", vnode, pVnode->tfn, strerror(errno));
goto _error;
}
vnodeCreateFileHeaderFd(pVnode->tfd);
pVnode->lfSize = lseek(pVnode->tfd, 0, SEEK_END);
}
int size = sizeof(SCompHeader) * pVnode->cfg.maxSessions + sizeof(TSCKSUM);
char *temp = malloc(size);
if (NULL == temp) {
dError("vid:%d, malloc failed", vnode);
taosLogError("vid:%d, malloc failed", vnode);
//vnodeRecoverFromPeer(pVnode, fileId);
goto _error;
}
memset(temp, 0, size);
taosCalcChecksumAppend(0, (uint8_t *)temp, size);
twrite(pVnode->nfd, temp, size);
free(temp);
pVnode->dfSize = lseek(pVnode->dfd, 0, SEEK_END);
return 0;
_error:
if (pVnode->dfd > 0) close(pVnode->dfd);
pVnode->dfd = 0;
if (pVnode->hfd > 0) close(pVnode->hfd);
pVnode->hfd = 0;
if (pVnode->nfd > 0) close(pVnode->nfd);
pVnode->nfd = 0;
if (pVnode->lfd > 0) close(pVnode->lfd);
pVnode->lfd = 0;
if (pVnode->tfd > 0) close(pVnode->tfd);
pVnode->tfd = 0;
return -1;
}
void vnodeRemoveFile(int vnode, int fileId) {
char headName[TSDB_FILENAME_LEN] = "\0";
char dataName[TSDB_FILENAME_LEN] = "\0";
char lastName[TSDB_FILENAME_LEN] = "\0";
char dHeadName[TSDB_FILENAME_LEN] = "\0";
char dDataName[TSDB_FILENAME_LEN] = "\0";
char dLastName[TSDB_FILENAME_LEN] = "\0";
SVnodeObj * pVnode = NULL;
SVnodeHeadInfo headInfo;
pVnode = vnodeList + vnode;
vnodeGetHeadDataLname(headName, dataName, lastName, vnode, fileId);
char *path = vnodeGetDiskFromHeadFile(headName);
if (path == NULL) {
return ;
}
vnodeGetDnameFromLname(headName, dataName, lastName, dHeadName, dDataName, dLastName);
int fd = open(headName, O_RDWR | O_CREAT, S_IRWXU | S_IRWXG | S_IRWXO);
if (fd > 0) {
vnodeGetHeadFileHeaderInfo(fd, &headInfo);
atomic_fetch_add_64(&(pVnode->vnodeStatistic.totalStorage), -headInfo.totalStorage);
close(fd);
}
remove(headName);
remove(dataName);
remove(lastName);
remove(dHeadName);
remove(dDataName);
remove(dLastName);
dPrint("vid:%d fileId:%d on disk: %s is removed, numOfFiles:%d maxFiles:%d", vnode, fileId, path,
pVnode->numOfFiles, pVnode->maxFiles);
}
void vnodeCloseCommitFiles(SVnodeObj *pVnode) {
char dpath[TSDB_FILENAME_LEN] = "\0";
int ret;
// Check new if new header file is correct
if (tsCheckHeaderFile != 0) {
assert(vnodeCheckNewHeaderFile(pVnode->nfd, pVnode) == 0);
}
close(pVnode->nfd);
pVnode->nfd = 0;
close(pVnode->hfd);
pVnode->hfd = 0;
close(pVnode->dfd);
pVnode->dfd = 0;
close(pVnode->lfd);
pVnode->lfd = 0;
if (pVnode->tfd > 0) close(pVnode->tfd);
pthread_mutex_lock(&(pVnode->vmutex));
readlink(pVnode->cfn, dpath, TSDB_FILENAME_LEN);
ret = rename(pVnode->nfn, pVnode->cfn);
if (ret < 0) {
dError("vid:%d, failed to rename:%s, reason:%s", pVnode->vnode, pVnode->nfn, strerror(errno));
}
remove(dpath);
if (pVnode->tfd > 0) {
memset(dpath, 0, TSDB_FILENAME_LEN);
readlink(pVnode->lfn, dpath, TSDB_FILENAME_LEN);
ret = rename(pVnode->tfn, pVnode->lfn);
if (ret < 0) {
dError("vid:%d, failed to rename:%s, reason:%s", pVnode->vnode, pVnode->tfn, strerror(errno));
}
remove(dpath);
}
pthread_mutex_unlock(&(pVnode->vmutex));
pVnode->tfd = 0;
dTrace("vid:%d, %s and %s is saved", pVnode->vnode, pVnode->cfn, pVnode->lfn);
vnodeAdustVnodeFile(pVnode);
vnodeSaveAllMeterObjToFile(pVnode->vnode);
return;
}
void vnodeBroadcastStatusToUnsyncedPeer(SVnodeObj *pVnode);
void *vnodeCommitMultiToFile(SVnodeObj *pVnode, int ssid, int esid) {
int vnode = pVnode->vnode;
SData * data[TSDB_MAX_COLUMNS], *cdata[TSDB_MAX_COLUMNS]; // first 4 bytes are length
char * buffer = NULL, *dmem = NULL, *cmem = NULL, *hmem = NULL, *tmem = NULL;
SMeterObj * pObj = NULL;
SCompInfo compInfo = {0};
SCompHeader * pHeader;
SMeterInfo * meterInfo = NULL, *pTable = NULL;
SQuery query;
SColumnInfoEx colList[TSDB_MAX_COLUMNS] = {0};
SSqlFunctionExpr pExprs[TSDB_MAX_COLUMNS] = {0};
int commitAgain;
int headLen, sid, col;
int64_t pointsRead;
int64_t pointsReadLast;
SCompBlock * pCompBlock = NULL;
SVnodeCfg * pCfg = &pVnode->cfg;
TSCKSUM chksum;
SVnodeHeadInfo headInfo;
uint8_t * pOldCompBlocks;
dPrint("vid:%d, committing to file, firstKey:%" PRId64 " lastKey:%" PRId64 " ssid:%d esid:%d", vnode, pVnode->firstKey,
pVnode->lastKey, ssid, esid);
if (pVnode->lastKey == 0) goto _over;
vnodeCloseAllSyncFds(vnode);
vnodeRenewCommitLog(vnode);
// get the MAX consumption buffer for this vnode
int32_t maxBytesPerPoint = 0;
int32_t minBytesPerPoint = INT32_MAX;
for (sid = ssid; sid <= esid; ++sid) {
pObj = (SMeterObj *)(pVnode->meterList[sid]);
if ((pObj == NULL) || (pObj->pCache == NULL)) continue;
if (maxBytesPerPoint < pObj->bytesPerPoint) {
maxBytesPerPoint = pObj->bytesPerPoint;
}
if (minBytesPerPoint > pObj->bytesPerPoint) {
minBytesPerPoint = pObj->bytesPerPoint;
}
}
// buffer to hold the temp head
int tcachblocks = pCfg->cacheBlockSize / (minBytesPerPoint * pCfg->rowsInFileBlock);
int hmsize =
(pCfg->cacheNumOfBlocks.totalBlocks * (MAX(tcachblocks, 1) + 1) + pCfg->maxSessions) * sizeof(SCompBlock);
// buffer to hold the uncompressed data
int dmsize =
maxBytesPerPoint * pCfg->rowsInFileBlock + (sizeof(SData) + EXTRA_BYTES + sizeof(TSCKSUM)) * TSDB_MAX_COLUMNS;
// buffer to hold the compressed data
int cmsize =
maxBytesPerPoint * pCfg->rowsInFileBlock + (sizeof(SData) + EXTRA_BYTES + sizeof(TSCKSUM)) * TSDB_MAX_COLUMNS;
// buffer to hold compHeader
int tmsize = sizeof(SCompHeader) * pCfg->maxSessions + sizeof(TSCKSUM);
// buffer to hold meterInfo
int misize = pVnode->cfg.maxSessions * sizeof(SMeterInfo);
int totalSize = hmsize + dmsize + cmsize + misize + tmsize;
buffer = malloc(totalSize);
if (buffer == NULL) {
dError("no enough memory for committing buffer");
return NULL;
}
hmem = buffer;
dmem = hmem + hmsize;
cmem = dmem + dmsize;
tmem = cmem + cmsize;
meterInfo = (SMeterInfo *)(tmem + tmsize);
pthread_mutex_lock(&(pVnode->vmutex));
pVnode->commitFirstKey = pVnode->firstKey;
pVnode->firstKey = pVnode->lastKey + 1;
pthread_mutex_unlock(&(pVnode->vmutex));
_again:
pVnode->commitInProcess = 1;
commitAgain = 0;
memset(hmem, 0, totalSize);
memset(&query, 0, sizeof(query));
if (vnodeOpenCommitFiles(pVnode, ssid) < 0) goto _over;
dTrace("vid:%d, start to commit, commitFirstKey:%" PRId64 " commitLastKey:%" PRId64, vnode, pVnode->commitFirstKey,
pVnode->commitLastKey);
headLen = 0;
vnodeGetHeadFileHeaderInfo(pVnode->hfd, &headInfo);
int maxOldBlocks = 1;
// read head info
if (pVnode->hfd) {
lseek(pVnode->hfd, TSDB_FILE_HEADER_LEN, SEEK_SET);
if (read(pVnode->hfd, tmem, tmsize) <= 0) {
dError("vid:%d, failed to read old header file:%s", vnode, pVnode->cfn);
taosLogError("vid:%d, failed to read old header file:%s", vnode, pVnode->cfn);
vnodeRecoverFromPeer(pVnode, pVnode->commitFileId);
goto _over;
} else {
if (!taosCheckChecksumWhole((uint8_t *)tmem, tmsize)) {
dError("vid:%d, failed to read old header file:%s since comp header offset is broken", vnode, pVnode->cfn);
taosLogError("vid:%d, failed to read old header file:%s since comp header offset is broken",
vnode, pVnode->cfn);
vnodeRecoverFromPeer(pVnode, pVnode->commitFileId);
goto _over;
}
}
}
// read compInfo
for (sid = 0; sid < pCfg->maxSessions; ++sid) {
if (pVnode->meterList == NULL) { // vnode is being freed, abort
goto _over;
}
pObj = (SMeterObj *)(pVnode->meterList[sid]);
if (pObj == NULL) {
continue;
}
// meter is going to be deleted, abort
if (vnodeIsMeterState(pObj, TSDB_METER_STATE_DROPPING)) {
dWarn("vid:%d sid:%d is dropped, ignore this meter", vnode, sid);
continue;
}
pTable = meterInfo + sid;
pHeader = ((SCompHeader *)tmem) + sid;
if (pVnode->hfd > 0) {
if (pHeader->compInfoOffset > 0) {
lseek(pVnode->hfd, pHeader->compInfoOffset, SEEK_SET);
if (read(pVnode->hfd, &compInfo, sizeof(compInfo)) == sizeof(compInfo)) {
if (!taosCheckChecksumWhole((uint8_t *)(&compInfo), sizeof(SCompInfo))) {
dError("vid:%d sid:%d id:%s, failed to read compinfo in file:%s since checksum mismatch",
vnode, sid, pObj->meterId, pVnode->cfn);
taosLogError("vid:%d sid:%d id:%s, failed to read compinfo in file:%s since checksum mismatch",
vnode, sid, pObj->meterId, pVnode->cfn);
vnodeRecoverFromPeer(pVnode, pVnode->commitFileId);
goto _over;
} else {
if (pObj->uid == compInfo.uid) {
pTable->oldNumOfBlocks = compInfo.numOfBlocks;
pTable->oldCompBlockOffset = pHeader->compInfoOffset + sizeof(SCompInfo);
pTable->last = compInfo.last;
if (compInfo.numOfBlocks > maxOldBlocks) maxOldBlocks = compInfo.numOfBlocks;
if (pTable->last) {
lseek(pVnode->hfd, sizeof(SCompBlock) * (compInfo.numOfBlocks - 1), SEEK_CUR);
read(pVnode->hfd, &pTable->lastBlock, sizeof(SCompBlock));
}
} else {
dTrace("vid:%d sid:%d id:%s, uid:%" PRIu64 " is not matched with old:%" PRIu64 ", old data will be thrown away",
vnode, sid, pObj->meterId, pObj->uid, compInfo.uid);
pTable->oldNumOfBlocks = 0;
}
}
} else {
dError("vid:%d sid:%d id:%s, failed to read compinfo in file:%s", vnode, sid, pObj->meterId, pVnode->cfn);
vnodeRecoverFromPeer(pVnode, pVnode->commitFileId);
goto _over;
}
}
}
}
// Loop To write data to fileId
for (sid = ssid; sid <= esid; ++sid) {
pObj = (SMeterObj *)(pVnode->meterList[sid]);
if ((pObj == NULL) || (pObj->pCache == NULL)) continue;
data[0] = (SData *)dmem;
cdata[0] = (SData *)cmem;
for (col = 1; col < pObj->numOfColumns; ++col) {
data[col] = (SData *)(((char *)data[col - 1]) + sizeof(SData) +
pObj->pointsPerFileBlock * pObj->schema[col - 1].bytes + EXTRA_BYTES + sizeof(TSCKSUM));
cdata[col] = (SData *)(((char *)cdata[col - 1]) + sizeof(SData) +
pObj->pointsPerFileBlock * pObj->schema[col - 1].bytes + EXTRA_BYTES + sizeof(TSCKSUM));
}
pTable = meterInfo + sid;
pTable->tempHeadOffset = headLen;
memset(&query, 0, sizeof(query));
query.colList = colList;
query.pSelectExpr = pExprs;
query.ekey = pVnode->commitLastKey;
query.skey = pVnode->commitFirstKey;
query.lastKey = query.skey;
query.sdata = data;
vnodeSetCommitQuery(pObj, &query);
dTrace("vid:%d sid:%d id:%s, start to commit, startKey:%" PRId64 " slot:%d pos:%d", pObj->vnode, pObj->sid, pObj->meterId,
pObj->lastKeyOnFile, query.slot, query.pos);
pointsRead = 0;
pointsReadLast = 0;
// last block is at last file
if (pTable->last) {
if ((pTable->lastBlock.sversion != pObj->sversion) || (query.over)) {
// TODO : Check the correctness of this code. write the last block to
// .data file
pCompBlock = (SCompBlock *)(hmem + headLen);
assert(dmem - (char *)pCompBlock >= sizeof(SCompBlock));
*pCompBlock = pTable->lastBlock;
if (pTable->lastBlock.sversion != pObj->sversion) {
pCompBlock->last = 0;
pCompBlock->offset = lseek(pVnode->dfd, 0, SEEK_END);
pTable->last = 0;
lseek(pVnode->lfd, pTable->lastBlock.offset, SEEK_SET);
tsendfile(pVnode->dfd, pVnode->lfd, NULL, pTable->lastBlock.len);
pVnode->dfSize = pCompBlock->offset + pTable->lastBlock.len;
} else {
if (ssid == 0) {
assert(pCompBlock->last && pVnode->tfd != -1);
pCompBlock->offset = lseek(pVnode->tfd, 0, SEEK_END);
lseek(pVnode->lfd, pTable->lastBlock.offset, SEEK_SET);
tsendfile(pVnode->tfd, pVnode->lfd, NULL, pTable->lastBlock.len);
pVnode->lfSize = pCompBlock->offset + pTable->lastBlock.len;
} else {
assert(pVnode->tfd == -1);
}
}
headLen += sizeof(SCompBlock);
pTable->newNumOfBlocks++;
} else {
// read last block into memory
if (vnodeReadLastBlockToMem(pObj, &pTable->lastBlock, data) < 0) goto _over;
pTable->last = 0;
pointsReadLast = pTable->lastBlock.numOfPoints;
query.over = 0;
headInfo.totalStorage -= (pointsReadLast * pObj->bytesPerPoint);
dTrace("vid:%d sid:%d id:%s, points:%d in last block will be merged to new block",
pObj->vnode, pObj->sid, pObj->meterId, pointsReadLast);
}
pTable->changed = 1;
pTable->oldNumOfBlocks--;
}
while (query.over == 0) {
pCompBlock = (SCompBlock *)(hmem + headLen);
assert(dmem - (char *)pCompBlock >= sizeof(SCompBlock));
pointsRead += pointsReadLast;
while (pointsRead < pObj->pointsPerFileBlock) {
query.pointsToRead = pObj->pointsPerFileBlock - pointsRead;
query.pointsOffset = pointsRead;
pointsRead += vnodeQueryFromCache(pObj, &query);
if (query.over) break;
}
if (pointsRead == 0) break;
headInfo.totalStorage += ((pointsRead - pointsReadLast) * pObj->bytesPerPoint);
pCompBlock->last = 1;
if (vnodeWriteBlockToFile(pObj, pCompBlock, data, cdata, pointsRead) < 0) goto _over;
if (pCompBlock->keyLast > pObj->lastKeyOnFile) pObj->lastKeyOnFile = pCompBlock->keyLast;
pTable->last = pCompBlock->last;
// write block info into header buffer
headLen += sizeof(SCompBlock);
pTable->newNumOfBlocks++;
pTable->committedPoints += (pointsRead - pointsReadLast);
dTrace("vid:%d sid:%d id:%s, pointsRead:%d, pointsReadLast:%d lastKey:%" PRId64 ", "
"slot:%d pos:%d newNumOfBlocks:%d headLen:%d",
pObj->vnode, pObj->sid, pObj->meterId, pointsRead, pointsReadLast, pObj->lastKeyOnFile, query.slot, query.pos,
pTable->newNumOfBlocks, headLen);
if (pointsRead < pObj->pointsPerFileBlock || query.keyIsMet) break;
pointsRead = 0;
pointsReadLast = 0;
}
dTrace("vid:%d sid:%d id:%s, %d points are committed, lastKey:%" PRId64 " slot:%d pos:%d newNumOfBlocks:%d",
pObj->vnode, pObj->sid, pObj->meterId, pTable->committedPoints, pObj->lastKeyOnFile, query.slot, query.pos,
pTable->newNumOfBlocks);
if (pTable->committedPoints > 0) {
pTable->commitSlot = query.slot;
pTable->commitPos = query.pos;
}
TSKEY nextKey = 0;
if (pObj->lastKey > pVnode->commitLastKey)
nextKey = pVnode->commitLastKey + 1;
else if (pObj->lastKey > pObj->lastKeyOnFile)
nextKey = pObj->lastKeyOnFile + 1;
pthread_mutex_lock(&(pVnode->vmutex));
if (nextKey < pVnode->firstKey && nextKey > 1) pVnode->firstKey = nextKey;
pthread_mutex_unlock(&(pVnode->vmutex));
}
if (pVnode->lastKey > pVnode->commitLastKey) commitAgain = 1;
dTrace("vid:%d, finish appending the data file", vnode);
// calculate the new compInfoOffset
int compInfoOffset = TSDB_FILE_HEADER_LEN + tmsize;
for (sid = 0; sid < pCfg->maxSessions; ++sid) {
pObj = (SMeterObj *)(pVnode->meterList[sid]);
pHeader = ((SCompHeader *)tmem) + sid;
if (pObj == NULL) {
pHeader->compInfoOffset = 0;
continue;
}
pTable = meterInfo + sid;
pTable->compInfoOffset = compInfoOffset;
pTable->finalNumOfBlocks = pTable->oldNumOfBlocks + pTable->newNumOfBlocks;
if (pTable->finalNumOfBlocks > 0) {
pHeader->compInfoOffset = pTable->compInfoOffset;
compInfoOffset += sizeof(SCompInfo) + pTable->finalNumOfBlocks * sizeof(SCompBlock) + sizeof(TSCKSUM);
} else {
pHeader->compInfoOffset = 0;
}
dTrace("vid:%d sid:%d id:%s, oldBlocks:%d numOfBlocks:%d compInfoOffset:%d", pObj->vnode, pObj->sid, pObj->meterId,
pTable->oldNumOfBlocks, pTable->finalNumOfBlocks, compInfoOffset);
}
// write the comp header into new file
vnodeUpdateHeadFileHeader(pVnode->nfd, &headInfo);
lseek(pVnode->nfd, TSDB_FILE_HEADER_LEN, SEEK_SET);
taosCalcChecksumAppend(0, (uint8_t *)tmem, tmsize);
if (twrite(pVnode->nfd, tmem, tmsize) <= 0) {
dError("vid:%d sid:%d id:%s, failed to write:%s, error:%s", vnode, sid, pObj->meterId, pVnode->nfn,
strerror(errno));
vnodeRecoverFromPeer(pVnode, pVnode->commitFileId);
goto _over;
}
pOldCompBlocks = (uint8_t *)malloc(sizeof(SCompBlock) * maxOldBlocks);
// write the comp block list in new file
for (sid = 0; sid < pCfg->maxSessions; ++sid) {
pObj = (SMeterObj *)(pVnode->meterList[sid]);
if (pObj == NULL) continue;
pTable = meterInfo + sid;
if (pTable->finalNumOfBlocks <= 0) continue;
compInfo.last = pTable->last;
compInfo.uid = pObj->uid;
compInfo.numOfBlocks = pTable->finalNumOfBlocks;
/* compInfo.compBlockLen = pTable->finalCompBlockLen; */
compInfo.delimiter = TSDB_VNODE_DELIMITER;
taosCalcChecksumAppend(0, (uint8_t *)(&compInfo), sizeof(SCompInfo));
lseek(pVnode->nfd, pTable->compInfoOffset, SEEK_SET);
if (twrite(pVnode->nfd, &compInfo, sizeof(compInfo)) <= 0) {
dError("vid:%d sid:%d id:%s, failed to write:%s, reason:%s", vnode, sid, pObj->meterId, pVnode->nfn,
strerror(errno));
vnodeRecoverFromPeer(pVnode, pVnode->commitFileId);
goto _over;
}
// write the old comp blocks
chksum = 0;
if (pVnode->hfd && pTable->oldNumOfBlocks) {
lseek(pVnode->hfd, pTable->oldCompBlockOffset, SEEK_SET);
if (pTable->changed) {
int compBlockLen = pTable->oldNumOfBlocks * sizeof(SCompBlock);
read(pVnode->hfd, pOldCompBlocks, compBlockLen);
twrite(pVnode->nfd, pOldCompBlocks, compBlockLen);
chksum = taosCalcChecksum(0, pOldCompBlocks, compBlockLen);
} else {
tsendfile(pVnode->nfd, pVnode->hfd, NULL, pTable->oldNumOfBlocks * sizeof(SCompBlock));
read(pVnode->hfd, &chksum, sizeof(TSCKSUM));
}
}
if (pTable->newNumOfBlocks) {
chksum = taosCalcChecksum(chksum, (uint8_t *)(hmem + pTable->tempHeadOffset),
pTable->newNumOfBlocks * sizeof(SCompBlock));
if (twrite(pVnode->nfd, hmem + pTable->tempHeadOffset, pTable->newNumOfBlocks * sizeof(SCompBlock)) <= 0) {
dError("vid:%d sid:%d id:%s, failed to write:%s, reason:%s", vnode, sid, pObj->meterId, pVnode->nfn,
strerror(errno));
vnodeRecoverFromPeer(pVnode, pVnode->commitFileId);
goto _over;
}
}
twrite(pVnode->nfd, &chksum, sizeof(TSCKSUM));
}
tfree(pOldCompBlocks);
dTrace("vid:%d, finish writing the new header file:%s", vnode, pVnode->nfn);
vnodeCloseCommitFiles(pVnode);
for (sid = ssid; sid <= esid; ++sid) {
pObj = (SMeterObj *)(pVnode->meterList[sid]);
if (pObj == NULL) continue;
pTable = meterInfo + sid;
if (pTable->finalNumOfBlocks <= 0) continue;
if (pTable->committedPoints > 0) {
vnodeUpdateCommitInfo(pObj, pTable->commitSlot, pTable->commitPos, pTable->commitCount);
}
}
if (commitAgain) {
pVnode->commitFirstKey = pVnode->commitLastKey + 1;
goto _again;
}
vnodeRemoveCommitLog(vnode);
_over:
pVnode->commitInProcess = 0;
vnodeCommitOver(pVnode);
memset(&(vnodeList[vnode].commitThread), 0, sizeof(vnodeList[vnode].commitThread));
tfree(buffer);
tfree(pOldCompBlocks);
vnodeBroadcastStatusToUnsyncedPeer(pVnode);
dPrint("vid:%d, committing is over", vnode);
return pVnode;
}
void *vnodeCommitToFile(void *param) {
SVnodeObj *pVnode = (SVnodeObj *)param;
return vnodeCommitMultiToFile(pVnode, 0, pVnode->cfg.maxSessions - 1);
}
int vnodeGetCompBlockInfo(SMeterObj *pObj, SQuery *pQuery) {
char prefix[TSDB_FILENAME_LEN];
char fileName[TSDB_FILENAME_LEN];
SCompHeader compHeader;
SCompInfo compInfo;
struct stat fstat;
SVnodeObj * pVnode = &vnodeList[pObj->vnode];
char * buffer = NULL;
TSCKSUM chksum;
vnodeFreeFields(pQuery);
tfree(pQuery->pBlock);
pQuery->numOfBlocks = 0;
SVnodeCfg *pCfg = &vnodeList[pObj->vnode].cfg;
if (pQuery->hfd > 0) close(pQuery->hfd);
sprintf(prefix, "%s/vnode%d/db/v%df%d", tsDirectory, pObj->vnode, pObj->vnode, pQuery->fileId);
sprintf(fileName, "%s.head", prefix);
pthread_mutex_lock(&(pVnode->vmutex));
pQuery->hfd = open(fileName, O_RDONLY);
pthread_mutex_unlock(&(pVnode->vmutex));
if (pQuery->hfd < 0) {
dError("vid:%d sid:%d id:%s, failed to open head file:%s, reason:%s", pObj->vnode, pObj->sid, pObj->meterId,
fileName, strerror(errno));
return vnodeRecoverFromPeer(pVnode, pQuery->fileId);
}
int tmsize = sizeof(SCompHeader) * pCfg->maxSessions + sizeof(TSCKSUM);
buffer = (char *)calloc(1, tmsize);
if (buffer == NULL) {
dError("vid:%d sid:%d id:%s, failed to allocate memory to buffer", pObj->vnode, pObj->sid, pObj->meterId);
return -TSDB_CODE_APP_ERROR;
}
lseek(pQuery->hfd, TSDB_FILE_HEADER_LEN, SEEK_SET);
if (read(pQuery->hfd, buffer, tmsize) != tmsize) {
dError("vid:%d sid:%d id:%s, file:%s failed to read comp header, reason:%s", pObj->vnode, pObj->sid, pObj->meterId,
fileName, strerror(errno));
taosLogError("vid:%d sid:%d id:%s, file:%s failed to read comp header", pObj->vnode, pObj->sid, pObj->meterId,
fileName);
tfree(buffer);
return vnodeRecoverFromPeer(pVnode, pQuery->fileId);
}
if (!taosCheckChecksumWhole((uint8_t *)buffer, tmsize)) {
dError("vid:%d sid:%d id:%s, file:%s comp header offset is broken", pObj->vnode, pObj->sid, pObj->meterId,
fileName);
taosLogError("vid:%d sid:%d id:%s, file:%s comp header offset is broken", pObj->vnode, pObj->sid, pObj->meterId,
fileName);
tfree(buffer);
return vnodeRecoverFromPeer(pVnode, pQuery->fileId);
}
compHeader = ((SCompHeader *)buffer)[pObj->sid];
tfree(buffer);
if (compHeader.compInfoOffset == 0) return 0;
lseek(pQuery->hfd, compHeader.compInfoOffset, SEEK_SET);
read(pQuery->hfd, &compInfo, sizeof(SCompInfo));
if (!taosCheckChecksumWhole((uint8_t *)(&compInfo), sizeof(SCompInfo))) {
dError("vid:%d sid:%d id:%s, file:%s compInfo checksum mismatch", pObj->vnode, pObj->sid, pObj->meterId, fileName);
taosLogError("vid:%d sid:%d id:%s, file:%s compInfo checksum mismatch", pObj->vnode, pObj->sid, pObj->meterId,
fileName);
return vnodeRecoverFromPeer(pVnode, pQuery->fileId);
}
if (compInfo.numOfBlocks <= 0) return 0;
if (compInfo.uid != pObj->uid) return 0;
pQuery->numOfBlocks = compInfo.numOfBlocks;
pQuery->pBlock = (SCompBlock *)calloc(1, (sizeof(SCompBlock) + sizeof(SField *)) * compInfo.numOfBlocks);
pQuery->pFields = (SField **)((char *)pQuery->pBlock + sizeof(SCompBlock) * compInfo.numOfBlocks);
/* char *pBlock = (char *)pQuery->pBlockFields +
* sizeof(SCompBlockFields)*compInfo.numOfBlocks; */
read(pQuery->hfd, pQuery->pBlock, compInfo.numOfBlocks * sizeof(SCompBlock));
read(pQuery->hfd, &chksum, sizeof(TSCKSUM));
if (chksum != taosCalcChecksum(0, (uint8_t *)(pQuery->pBlock), compInfo.numOfBlocks * sizeof(SCompBlock))) {
dError("vid:%d sid:%d id:%s, head file comp block broken, fileId: %d", pObj->vnode, pObj->sid, pObj->meterId,
pQuery->fileId);
taosLogError("vid:%d sid:%d id:%s, head file comp block broken, fileId: %d", pObj->vnode, pObj->sid, pObj->meterId,
pQuery->fileId);
return vnodeRecoverFromPeer(pVnode, pQuery->fileId);
}
close(pQuery->hfd);
pQuery->hfd = -1;
sprintf(fileName, "%s.data", prefix);
if (stat(fileName, &fstat) < 0) {
dError("vid:%d sid:%d id:%s, data file:%s not there!", pObj->vnode, pObj->sid, pObj->meterId, fileName);
return vnodeRecoverFromPeer(pVnode, pQuery->fileId);
}
if (pQuery->dfd > 0) close(pQuery->dfd);
pQuery->dfd = open(fileName, O_RDONLY);
if (pQuery->dfd < 0) {
dError("vid:%d sid:%d id:%s, failed to open data file:%s, reason:%s", pObj->vnode, pObj->sid, pObj->meterId,
fileName, strerror(errno));
return vnodeRecoverFromPeer(pVnode, pQuery->fileId);
}
sprintf(fileName, "%s.last", prefix);
if (stat(fileName, &fstat) < 0) {
dError("vid:%d sid:%d id:%s, last file:%s not there!", pObj->vnode, pObj->sid, pObj->meterId, fileName);
return vnodeRecoverFromPeer(pVnode, pQuery->fileId);
}
if (pQuery->lfd > 0) close(pQuery->lfd);
pQuery->lfd = open(fileName, O_RDONLY);
if (pQuery->lfd < 0) {
dError("vid:%d sid:%d id:%s, failed to open last file:%s, reason:%s", pObj->vnode, pObj->sid, pObj->meterId,
fileName, strerror(errno));
return vnodeRecoverFromPeer(pVnode, pQuery->fileId);
}
return pQuery->numOfBlocks;
}
int vnodeReadColumnToMem(int fd, SCompBlock *pBlock, SField **fields, int col, char *data, int dataSize,
char *temp, char *buffer, int bufferSize) {
int len = 0, size = 0;
SField *tfields = NULL;
TSCKSUM chksum = 0;
if (*fields == NULL) {
size = sizeof(SField) * (pBlock->numOfCols) + sizeof(TSCKSUM);
*fields = (SField *)calloc(1, size);
lseek(fd, pBlock->offset, SEEK_SET);
read(fd, *fields, size);
if (!taosCheckChecksumWhole((uint8_t *)(*fields), size)) {
dError("SField checksum error, col: %d", col);
taosLogError("SField checksum error, col: %d", col);
return -1;
}
}
tfields = *fields;
/* If data is NULL, that means only to read SField content. So no need to read data part. */
if (data == NULL) return 0;
lseek(fd, pBlock->offset + tfields[col].offset, SEEK_SET);
if (pBlock->algorithm) {
len = read(fd, temp, tfields[col].len);
read(fd, &chksum, sizeof(TSCKSUM));
if (chksum != taosCalcChecksum(0, (uint8_t *)temp, tfields[col].len)) {
dError("data column checksum error, col: %d", col);
taosLogError("data column checksum error, col: %d", col);
return -1;
}
(*pDecompFunc[tfields[col].type])(temp, tfields[col].len, pBlock->numOfPoints, data, dataSize,
pBlock->algorithm, buffer, bufferSize);
} else {
len = read(fd, data, tfields[col].len);
read(fd, &chksum, sizeof(TSCKSUM));
if (chksum != taosCalcChecksum(0, (uint8_t *)data, tfields[col].len)) {
dError("data column checksum error, col: %d", col);
taosLogError("data column checksum error, col: %d", col);
return -1;
}
}
if (len <= 0) {
dError("failed to read col:%d, offset:%d, reason:%s", col, (int32_t)(tfields[col].offset), strerror(errno));
return -1;
}
return 0;
}
int vnodeReadCompBlockToMem(SMeterObj *pObj, SQuery *pQuery, SData *sdata[]) {
char * temp = NULL;
int i = 0, col = 0, code = 0;
SCompBlock *pBlock = NULL;
SField ** pFields = NULL;
char * buffer = NULL;
int bufferSize = 0;
int dfd = pQuery->dfd;
tfree(pQuery->pFields[pQuery->slot]);
pBlock = pQuery->pBlock + pQuery->slot;
pFields = pQuery->pFields + pQuery->slot;
temp = malloc(pObj->bytesPerPoint * (pBlock->numOfPoints + 1));
if (pBlock->last) dfd = pQuery->lfd;
if (pBlock->algorithm == TWO_STAGE_COMP) {
bufferSize = pObj->maxBytes * pBlock->numOfPoints + EXTRA_BYTES;
buffer = (char *)calloc(1, bufferSize);
}
if (pQuery->colList[0].colIdx != PRIMARYKEY_TIMESTAMP_COL_INDEX) {
// load timestamp column first in any cases.
code = vnodeReadColumnToMem(dfd, pBlock, pFields, PRIMARYKEY_TIMESTAMP_COL_INDEX,
pQuery->tsData->data + pQuery->pointsOffset * TSDB_KEYSIZE,
TSDB_KEYSIZE*pBlock->numOfPoints, temp, buffer, bufferSize);
col = 1;
} else {
// Read the SField data for this block first, if timestamp column is retrieved in this query, we ignore this process
code = vnodeReadColumnToMem(dfd, pBlock, pFields, 0, NULL, 0, NULL, buffer, bufferSize);
}
if (code < 0) goto _over;
while (col < pBlock->numOfCols && i < pQuery->numOfCols) {
SColumnInfo *pColumnInfo = &pQuery->colList[i].data;
if ((*pFields)[col].colId < pColumnInfo->colId) {
++col;
} else if ((*pFields)[col].colId == pColumnInfo->colId) {
code = vnodeReadColumnToMem(dfd, pBlock, pFields, col, sdata[i]->data, pColumnInfo->bytes*pBlock->numOfPoints, temp, buffer, bufferSize);
if (code < 0) goto _over;
++i;
++col;
} else {
/*
* pQuery->colList[i].colIdx < (*pFields)[col].colId, this column is not existed in current block,
* fill space with NULL value
*/
char * output = sdata[i]->data;
int32_t bytes = pQuery->colList[i].data.bytes;
int32_t type = pQuery->colList[i].data.type;
setNullN(output, type, bytes, pBlock->numOfPoints);
++i;
}
}
if (col >= pBlock->numOfCols && i < pQuery->numOfCols) {
// remain columns need to set null value
while (i < pQuery->numOfCols) {
char * output = sdata[i]->data;
int32_t bytes = pQuery->colList[i].data.bytes;
int32_t type = pQuery->colList[i].data.type;
setNullN(output, type, bytes, pBlock->numOfPoints);
++i;
}
}
_over:
tfree(buffer);
tfree(temp);
if (code < 0) code = vnodeRecoverFromPeer(vnodeList + pObj->vnode, pQuery->fileId);
return code;
}
int vnodeReadLastBlockToMem(SMeterObj *pObj, SCompBlock *pBlock, SData *sdata[]) {
char * temp = NULL;
int col = 0, code = 0;
SField *pFields = NULL;
char * buffer = NULL;
int bufferSize = 0;
SVnodeObj *pVnode = vnodeList + pObj->vnode;
temp = malloc(pObj->bytesPerPoint * (pBlock->numOfPoints + 1));
if (pBlock->algorithm == TWO_STAGE_COMP) {
bufferSize = pObj->maxBytes*pBlock->numOfPoints+EXTRA_BYTES;
buffer = (char *)calloc(1, pObj->maxBytes * pBlock->numOfPoints + EXTRA_BYTES);
}
for (col = 0; col < pBlock->numOfCols; ++col) {
code = vnodeReadColumnToMem(pVnode->lfd, pBlock, &pFields, col, sdata[col]->data,
pObj->pointsPerFileBlock*pObj->schema[col].bytes+EXTRA_BYTES, temp, buffer, bufferSize);
if (code < 0) break;
sdata[col]->len = pObj->schema[col].bytes * pBlock->numOfPoints;
}
tfree(buffer);
tfree(temp);
tfree(pFields);
if (code < 0) code = vnodeRecoverFromPeer(pVnode, pVnode->fileId);
return code;
}
int vnodeWriteBlockToFile(SMeterObj *pObj, SCompBlock *pCompBlock, SData *data[], SData *cdata[], int points) {
SVnodeObj *pVnode = &vnodeList[pObj->vnode];
SVnodeCfg *pCfg = &pVnode->cfg;
int wlen = 0;
SField * fields = NULL;
int size = sizeof(SField) * pObj->numOfColumns + sizeof(TSCKSUM);
int32_t offset = size;
char * buffer = NULL;
int bufferSize = 0;
int dfd = pVnode->dfd;
if (pCompBlock->last && (points < pObj->pointsPerFileBlock * tsFileBlockMinPercent)) {
dTrace("vid:%d sid:%d id:%s, points:%d are written to last block, block stime: %" PRId64 ", block etime: %" PRId64,
pObj->vnode, pObj->sid, pObj->meterId, points, *((TSKEY *)(data[0]->data)),
*((TSKEY * )(data[0]->data + (points - 1) * pObj->schema[0].bytes)));
pCompBlock->last = 1;
dfd = pVnode->tfd > 0 ? pVnode->tfd : pVnode->lfd;
} else {
pCompBlock->last = 0;
}
pCompBlock->offset = lseek(dfd, 0, SEEK_END);
pCompBlock->len = 0;
fields = (SField *)calloc(1, size);
if (fields == NULL) return -1;
if (pCfg->compression == TWO_STAGE_COMP){
bufferSize = pObj->maxBytes * points + EXTRA_BYTES;
buffer = (char *)malloc(bufferSize);
}
for (int i = 0; i < pObj->numOfColumns; ++i) {
fields[i].colId = pObj->schema[i].colId;
fields[i].type = pObj->schema[i].type;
fields[i].bytes = pObj->schema[i].bytes;
fields[i].offset = offset;
// assert(data[i]->len == points*pObj->schema[i].bytes);
if (pCfg->compression) {
cdata[i]->len = (*pCompFunc[(uint8_t)pObj->schema[i].type])(data[i]->data, points * pObj->schema[i].bytes, points,
cdata[i]->data, pObj->schema[i].bytes*pObj->pointsPerFileBlock+EXTRA_BYTES,
pCfg->compression, buffer, bufferSize);
fields[i].len = cdata[i]->len;
taosCalcChecksumAppend(0, (uint8_t *)(cdata[i]->data), cdata[i]->len + sizeof(TSCKSUM));
offset += (cdata[i]->len + sizeof(TSCKSUM));
} else {
data[i]->len = pObj->schema[i].bytes * points;
fields[i].len = data[i]->len;
taosCalcChecksumAppend(0, (uint8_t *)(data[i]->data), data[i]->len + sizeof(TSCKSUM));
offset += (data[i]->len + sizeof(TSCKSUM));
}
getStatistics(data[0]->data, data[i]->data, pObj->schema[i].bytes, points, pObj->schema[i].type, &fields[i].min,
&fields[i].max, &fields[i].sum, &fields[i].minIndex, &fields[i].maxIndex, &fields[i].numOfNullPoints);
}
tfree(buffer);
// Write SField part
taosCalcChecksumAppend(0, (uint8_t *)fields, size);
wlen = twrite(dfd, fields, size);
if (wlen <= 0) {
tfree(fields);
dError("vid:%d sid:%d id:%s, failed to write block, wlen:%d reason:%s", pObj->vnode, pObj->sid, pObj->meterId, wlen,
strerror(errno));
#ifdef CLUSTER
return vnodeRecoverFromPeer(pVnode, pVnode->commitFileId);
#else
return -1;
#endif
}
pVnode->vnodeStatistic.compStorage += wlen;
pVnode->dfSize += wlen;
pCompBlock->len += wlen;
tfree(fields);
// Write data part
for (int i = 0; i < pObj->numOfColumns; ++i) {
if (pCfg->compression) {
wlen = twrite(dfd, cdata[i]->data, cdata[i]->len + sizeof(TSCKSUM));
} else {
wlen = twrite(dfd, data[i]->data, data[i]->len + sizeof(TSCKSUM));
}
if (wlen <= 0) {
dError("vid:%d sid:%d id:%s, failed to write block, wlen:%d points:%d reason:%s",
pObj->vnode, pObj->sid, pObj->meterId, wlen, points, strerror(errno));
return vnodeRecoverFromPeer(pVnode, pVnode->commitFileId);
}
pVnode->vnodeStatistic.compStorage += wlen;
pVnode->dfSize += wlen;
pCompBlock->len += wlen;
}
dTrace("vid:%d, vnode compStorage size is: %" PRId64, pObj->vnode, pVnode->vnodeStatistic.compStorage);
pCompBlock->algorithm = pCfg->compression;
pCompBlock->numOfPoints = points;
pCompBlock->numOfCols = pObj->numOfColumns;
pCompBlock->keyFirst = *((TSKEY *)(data[0]->data)); // hack way to get the key
pCompBlock->keyLast = *((TSKEY *)(data[0]->data + (points - 1) * pObj->schema[0].bytes));
pCompBlock->sversion = pObj->sversion;
assert(pCompBlock->keyFirst <= pCompBlock->keyLast);
return 0;
}
static int forwardInFile(SQuery *pQuery, int32_t midSlot, int32_t step, SVnodeObj *pVnode, SMeterObj *pObj);
int vnodeSearchPointInFile(SMeterObj *pObj, SQuery *pQuery) {
TSKEY latest, oldest;
int ret = 0;
int64_t delta = 0;
int firstSlot, lastSlot, midSlot;
int numOfBlocks;
char * temp = NULL, *data = NULL;
SCompBlock *pBlock = NULL;
SVnodeObj * pVnode = &vnodeList[pObj->vnode];
int step;
char * buffer = NULL;
int bufferSize = 0;
int dfd;
// if file is broken, pQuery->slot = -2; if not found, pQuery->slot = -1;
pQuery->slot = -1;
pQuery->pos = -1;
if (pVnode->numOfFiles <= 0) return 0;
SVnodeCfg *pCfg = &pVnode->cfg;
delta = (int64_t)pCfg->daysPerFile * tsMsPerDay[(uint8_t)pVnode->cfg.precision];
latest = pObj->lastKeyOnFile;
oldest = (pVnode->fileId - pVnode->numOfFiles + 1) * delta;
if (latest < oldest) return 0;
if (!QUERY_IS_ASC_QUERY(pQuery)) {
if (pQuery->skey < oldest) return 0;
if (pQuery->ekey > latest) return 0;
if (pQuery->skey > latest) pQuery->skey = latest;
} else {
if (pQuery->skey > latest) return 0;
if (pQuery->ekey < oldest) return 0;
if (pQuery->skey < oldest) pQuery->skey = oldest;
}
dTrace("vid:%d sid:%d id:%s, skey:%" PRId64 " ekey:%" PRId64 " oldest:%" PRId64 " latest:%" PRId64 " fileId:%d numOfFiles:%d",
pObj->vnode, pObj->sid, pObj->meterId, pQuery->skey, pQuery->ekey, oldest, latest, pVnode->fileId,
pVnode->numOfFiles);
step = QUERY_IS_ASC_QUERY(pQuery) ? 1 : -1;
pQuery->fileId = pQuery->skey / delta; // starting fileId
pQuery->fileId -= step; // hacker way to make while loop below works
bufferSize = pCfg->rowsInFileBlock*sizeof(TSKEY)+EXTRA_BYTES;
buffer = (char *)calloc(1, bufferSize);
while (1) {
pQuery->fileId += step;
if ((pQuery->fileId > pVnode->fileId) || (pQuery->fileId < pVnode->fileId - pVnode->numOfFiles + 1)) {
tfree(buffer);
return 0;
}
ret = vnodeGetCompBlockInfo(pObj, pQuery);
if (ret == 0) continue;
if (ret < 0) break; // file broken
pBlock = pQuery->pBlock;
firstSlot = 0;
lastSlot = pQuery->numOfBlocks - 1;
//numOfBlocks = pQuery->numOfBlocks;
if (QUERY_IS_ASC_QUERY(pQuery) && pBlock[lastSlot].keyLast < pQuery->skey) continue;
if (!QUERY_IS_ASC_QUERY(pQuery) && pBlock[firstSlot].keyFirst > pQuery->skey) continue;
while (1) {
numOfBlocks = lastSlot - firstSlot + 1;
midSlot = (firstSlot + (numOfBlocks >> 1));
if (numOfBlocks == 1) break;
if (pQuery->skey > pBlock[midSlot].keyLast) {
if (numOfBlocks == 2) break;
if (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->skey < pBlock[midSlot + 1].keyFirst)) break;
firstSlot = midSlot + 1;
} else if (pQuery->skey < pBlock[midSlot].keyFirst) {
if (QUERY_IS_ASC_QUERY(pQuery) && (pQuery->skey > pBlock[midSlot - 1].keyLast)) break;
lastSlot = midSlot - 1;
} else {
break; // got the slot
}
}
pQuery->slot = midSlot;
if (!QUERY_IS_ASC_QUERY(pQuery)) {
if (pQuery->skey < pBlock[midSlot].keyFirst) break;
if (pQuery->ekey > pBlock[midSlot].keyLast) {
pQuery->slot = midSlot + 1;
break;
}
} else {
if (pQuery->skey > pBlock[midSlot].keyLast) {
pQuery->slot = midSlot + 1;
break;
}
if (pQuery->ekey < pBlock[midSlot].keyFirst) break;
}
temp = malloc(pObj->pointsPerFileBlock * TSDB_KEYSIZE + EXTRA_BYTES); // only first column
data = malloc(pObj->pointsPerFileBlock * TSDB_KEYSIZE + EXTRA_BYTES); // only first column
dfd = pBlock[midSlot].last ? pQuery->lfd : pQuery->dfd;
ret = vnodeReadColumnToMem(dfd, pBlock + midSlot, pQuery->pFields + midSlot, 0, data,
pObj->pointsPerFileBlock*TSDB_KEYSIZE+EXTRA_BYTES,
temp, buffer, bufferSize);
if (ret < 0) {
ret = vnodeRecoverFromPeer(pVnode, pQuery->fileId);
break;
} // file broken
pQuery->pos = (*vnodeSearchKeyFunc[pObj->searchAlgorithm])(data, pBlock[midSlot].numOfPoints, pQuery->skey,
pQuery->order.order);
pQuery->key = *((TSKEY *)(data + pObj->schema[0].bytes * pQuery->pos));
ret = vnodeForwardStartPosition(pQuery, pBlock, midSlot, pVnode, pObj);
break;
}
tfree(buffer);
tfree(temp);
tfree(data);
return ret;
}
int vnodeForwardStartPosition(SQuery *pQuery, SCompBlock *pBlock, int32_t slotIdx, SVnodeObj *pVnode, SMeterObj *pObj) {
int step = QUERY_IS_ASC_QUERY(pQuery) ? 1 : -1;
if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols == 0) {
int maxReads = QUERY_IS_ASC_QUERY(pQuery) ? pBlock->numOfPoints - pQuery->pos : pQuery->pos + 1;
if (pQuery->limit.offset < maxReads) { // start position in current block
if (QUERY_IS_ASC_QUERY(pQuery)) {
pQuery->pos += pQuery->limit.offset;
} else {
pQuery->pos -= pQuery->limit.offset;
}
pQuery->limit.offset = 0;
} else {
pQuery->limit.offset -= maxReads;
slotIdx += step;
return forwardInFile(pQuery, slotIdx, step, pVnode, pObj);
}
}
return pQuery->numOfBlocks;
}
int forwardInFile(SQuery *pQuery, int32_t slotIdx, int32_t step, SVnodeObj *pVnode, SMeterObj *pObj) {
SCompBlock *pBlock = pQuery->pBlock;
while (slotIdx < pQuery->numOfBlocks && slotIdx >= 0 && pQuery->limit.offset >= pBlock[slotIdx].numOfPoints) {
pQuery->limit.offset -= pBlock[slotIdx].numOfPoints;
slotIdx += step;
}
if (slotIdx < pQuery->numOfBlocks && slotIdx >= 0) {
if (QUERY_IS_ASC_QUERY(pQuery)) {
pQuery->pos = pQuery->limit.offset;
} else {
pQuery->pos = pBlock[slotIdx].numOfPoints - pQuery->limit.offset - 1;
}
pQuery->slot = slotIdx;
pQuery->limit.offset = 0;
return pQuery->numOfBlocks;
} else { // continue in next file, forward pQuery->limit.offset points
int ret = 0;
pQuery->slot = -1;
pQuery->pos = -1;
while (1) {
pQuery->fileId += step;
if ((pQuery->fileId > pVnode->fileId) || (pQuery->fileId < pVnode->fileId - pVnode->numOfFiles + 1)) {
pQuery->lastKey = pObj->lastKeyOnFile;
pQuery->skey = pQuery->lastKey + 1;
return 0;
}
ret = vnodeGetCompBlockInfo(pObj, pQuery);
if (ret == 0) continue;
if (ret > 0) break; // qualified file
}
if (ret > 0) {
int startSlot = QUERY_IS_ASC_QUERY(pQuery) ? 0 : pQuery->numOfBlocks - 1;
return forwardInFile(pQuery, startSlot, step, pVnode, pObj);
} else {
return ret;
}
}
}
static FORCE_INLINE TSKEY vnodeGetTSInDataBlock(SQuery *pQuery, int32_t pos, int32_t factor) {
return *(TSKEY *)(pQuery->tsData->data + (pQuery->pointsOffset * factor + pos) * TSDB_KEYSIZE);
}
int vnodeQueryFromFile(SMeterObj *pObj, SQuery *pQuery) {
int numOfReads = 0;
int lastPos = -1, startPos;
int col, step, code = 0;
char * pRead, *pData;
SData * sdata[TSDB_MAX_COLUMNS];
SCompBlock *pBlock = NULL;
SVnodeObj * pVnode = &vnodeList[pObj->vnode];
pQuery->pointsRead = 0;
int keyLen = TSDB_KEYSIZE;
if (pQuery->over) return 0;
if (pQuery->slot < 0) // it means a new query, we need to find the point first
code = vnodeSearchPointInFile(pObj, pQuery);
if (code < 0 || pQuery->slot < 0 || pQuery->pos == -1) {
pQuery->over = 1;
return code;
}
step = QUERY_IS_ASC_QUERY(pQuery) ? -1 : 1;
pBlock = pQuery->pBlock + pQuery->slot;
if (pQuery->pos == FILE_QUERY_NEW_BLOCK) {
if (!QUERY_IS_ASC_QUERY(pQuery)) {
if (pQuery->ekey > pBlock->keyLast) pQuery->over = 1;
if (pQuery->skey < pBlock->keyFirst) pQuery->over = 1;
} else {
if (pQuery->ekey < pBlock->keyFirst) pQuery->over = 1;
if (pQuery->skey > pBlock->keyLast) pQuery->over = 1;
}
pQuery->pos = QUERY_IS_ASC_QUERY(pQuery) ? 0 : pBlock->numOfPoints - 1;
}
if (pQuery->over) return 0;
// To make sure the start position of each buffer is aligned to 4bytes in 32-bit ARM system.
for(col = 0; col < pQuery->numOfCols; ++col) {
sdata[col] = calloc(1, sizeof(SData) + pBlock->numOfPoints * pQuery->colList[col].data.bytes + EXTRA_BYTES);
}
/*
* timestamp column is fetched in any cases. Therefore, if the query does not fetch primary column,
* we allocate tsData buffer with twice size of the other ordinary pQuery->sdata.
* Otherwise, the query function may over-write buffer area while retrieve function has not packed the results into
* message to send to client yet.
*
* So the startPositionFactor is needed to denote which half part is used to store the result, and which
* part is available for keep data during query process.
*
* Note: the startPositionFactor must be used in conjunction with pQuery->pointsOffset
*/
int32_t startPositionFactor = 1;
if (pQuery->colList[0].colIdx == PRIMARYKEY_TIMESTAMP_COL_INDEX) {
pQuery->tsData = sdata[0];
startPositionFactor = 0;
}
code = vnodeReadCompBlockToMem(pObj, pQuery, sdata);
if (code < 0) {
dError("vid:%d sid:%d id:%s, failed to read block:%d numOfPoints:%d", pObj->vnode, pObj->sid, pObj->meterId,
pQuery->slot, pBlock->numOfPoints);
goto _next;
}
int maxReads = QUERY_IS_ASC_QUERY(pQuery) ? pBlock->numOfPoints - pQuery->pos : pQuery->pos + 1;
TSKEY startKey = vnodeGetTSInDataBlock(pQuery, 0, startPositionFactor);
TSKEY endKey = vnodeGetTSInDataBlock(pQuery, pBlock->numOfPoints - 1, startPositionFactor);
if (QUERY_IS_ASC_QUERY(pQuery)) {
if (endKey < pQuery->ekey) {
numOfReads = maxReads;
} else {
lastPos = (*vnodeSearchKeyFunc[pObj->searchAlgorithm])(
pQuery->tsData->data + keyLen * (pQuery->pos + pQuery->pointsOffset * startPositionFactor), maxReads,
pQuery->ekey, TSQL_SO_DESC);
numOfReads = (lastPos >= 0) ? lastPos + 1 : 0;
}
} else {
if (startKey > pQuery->ekey) {
numOfReads = maxReads;
} else {
lastPos = (*vnodeSearchKeyFunc[pObj->searchAlgorithm])(
pQuery->tsData->data + keyLen * pQuery->pointsOffset * startPositionFactor, maxReads, pQuery->ekey,
TSQL_SO_ASC);
numOfReads = (lastPos >= 0) ? pQuery->pos - lastPos + 1 : 0;
}
}
if (numOfReads > pQuery->pointsToRead - pQuery->pointsRead) {
numOfReads = pQuery->pointsToRead - pQuery->pointsRead;
} else {
if (lastPos >= 0 || numOfReads == 0) {
pQuery->keyIsMet = 1;
pQuery->over = 1;
}
}
startPos = QUERY_IS_ASC_QUERY(pQuery) ? pQuery->pos : pQuery->pos - numOfReads + 1;
int32_t numOfQualifiedPoints = 0;
int32_t numOfActualRead = numOfReads;
// copy data to result buffer
if (pQuery->numOfFilterCols == 0) {
// no filter condition on ordinary columns
for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) {
int16_t colBufferIndex = pQuery->pSelectExpr[i].pBase.colInfo.colIdxInBuf;
int32_t bytes = GET_COLUMN_BYTES(pQuery, i);
pData = pQuery->sdata[i]->data + pQuery->pointsOffset * bytes;
pRead = sdata[colBufferIndex]->data + startPos * bytes;
if (QUERY_IS_ASC_QUERY(pQuery)) {
memcpy(pData, pRead, numOfReads * bytes);
} else { //reversed copy to output buffer
for(int32_t j = 0; j < numOfReads; ++j) {
memcpy(pData + bytes * j, pRead + (numOfReads - 1 - j) * bytes, bytes);
}
}
}
numOfQualifiedPoints = numOfReads;
} else {
// check each data one by one set the input column data
for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
struct SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
pFilterInfo->pData = sdata[pFilterInfo->info.colIdxInBuf]->data;
}
int32_t *ids = calloc(1, numOfReads * sizeof(int32_t));
numOfActualRead = 0;
if (QUERY_IS_ASC_QUERY(pQuery)) {
for (int32_t j = startPos; j < pBlock->numOfPoints; j -= step) {
TSKEY key = vnodeGetTSInDataBlock(pQuery, j, startPositionFactor);
if (key < startKey || key > endKey) {
dError("vid:%d sid:%d id:%s, timestamp in file block disordered. slot:%d, pos:%d, ts:%" PRId64 ", block "
"range:%" PRId64 "-%" PRId64, pObj->vnode, pObj->sid, pObj->meterId, pQuery->slot, j, key, startKey, endKey);
tfree(ids);
return -TSDB_CODE_FILE_BLOCK_TS_DISORDERED;
}
// out of query range, quit
if (key > pQuery->ekey) {
break;
}
if (!vnodeFilterData(pQuery, &numOfActualRead, j)) {
continue;
}
ids[numOfQualifiedPoints] = j;
if (++numOfQualifiedPoints == numOfReads) { // qualified data are enough
break;
}
}
} else {
for (int32_t j = pQuery->pos; j >= 0; --j) {
TSKEY key = vnodeGetTSInDataBlock(pQuery, j, startPositionFactor);
if (key < startKey || key > endKey) {
dError("vid:%d sid:%d id:%s, timestamp in file block disordered. slot:%d, pos:%d, ts:%" PRId64 ", block "
"range:%" PRId64 "-%" PRId64, pObj->vnode, pObj->sid, pObj->meterId, pQuery->slot, j, key, startKey, endKey);
tfree(ids);
return -TSDB_CODE_FILE_BLOCK_TS_DISORDERED;
}
// out of query range, quit
if (key < pQuery->ekey) {
break;
}
if (!vnodeFilterData(pQuery, &numOfActualRead, j)) {
continue;
}
ids[numOfQualifiedPoints] = j;
if (++numOfQualifiedPoints == numOfReads) { // qualified data are enough
break;
}
}
}
// int32_t start = QUERY_IS_ASC_QUERY(pQuery) ? 0 : numOfReads - numOfQualifiedPoints;
for (int32_t j = 0; j < numOfQualifiedPoints; ++j) {
for (int32_t col = 0; col < pQuery->numOfOutputCols; ++col) {
int16_t colIndexInBuffer = pQuery->pSelectExpr[col].pBase.colInfo.colIdxInBuf;
int32_t bytes = GET_COLUMN_BYTES(pQuery, col);
pData = pQuery->sdata[col]->data + (pQuery->pointsOffset + j) * bytes;
pRead = sdata[colIndexInBuffer]->data + ids[j/* + start*/] * bytes;
memcpy(pData, pRead, bytes);
}
}
tfree(ids);
assert(numOfQualifiedPoints <= numOfReads);
}
// Note: numOfQualifiedPoints may be 0, since no data in this block are qualified
assert(pQuery->pointsRead == 0);
pQuery->pointsRead += numOfQualifiedPoints;
for (col = 0; col < pQuery->numOfOutputCols; ++col) {
int16_t bytes = GET_COLUMN_BYTES(pQuery, col);
pQuery->sdata[col]->len = bytes * (pQuery->pointsOffset + pQuery->pointsRead);
}
pQuery->pos -= numOfActualRead * step;
// update the lastkey/skey
int32_t lastAccessPos = pQuery->pos + step;
pQuery->lastKey = vnodeGetTSInDataBlock(pQuery, lastAccessPos, startPositionFactor);
pQuery->skey = pQuery->lastKey - step;
_next:
if ((pQuery->pos < 0 || pQuery->pos >= pBlock->numOfPoints || numOfReads == 0) && (pQuery->over == 0)) {
pQuery->slot = pQuery->slot - step;
pQuery->pos = FILE_QUERY_NEW_BLOCK;
}
if ((pQuery->slot < 0 || pQuery->slot >= pQuery->numOfBlocks) && (pQuery->over == 0)) {
int ret;
while (1) {
ret = -1;
pQuery->fileId -= step; // jump to next file
if (QUERY_IS_ASC_QUERY(pQuery)) {
if (pQuery->fileId > pVnode->fileId) {
// to do:
// check if file is updated, if updated, open again and check if this Meter is updated
// if meter is updated, read in new block info, and
break;
}
} else {
if ((pVnode->fileId - pQuery->fileId + 1) > pVnode->numOfFiles) break;
}
ret = vnodeGetCompBlockInfo(pObj, pQuery);
if (ret > 0) break;
if (ret < 0) code = ret;
}
if (ret <= 0) pQuery->over = 1;
pQuery->slot = QUERY_IS_ASC_QUERY(pQuery) ? 0 : pQuery->numOfBlocks - 1;
}
for(int32_t i = 0; i < pQuery->numOfCols; ++i) {
tfree(sdata[i]);
}
return code;
}
int vnodeUpdateFileMagic(int vnode, int fileId) {
struct stat fstat;
char fileName[256];
SVnodeObj *pVnode = vnodeList + vnode;
uint64_t magic = 0;
vnodeGetHeadDataLname(fileName, NULL, NULL, vnode, fileId);
if (stat(fileName, &fstat) != 0) {
dError("vid:%d, head file:%s is not there", vnode, fileName);
return -1;
}
int size = sizeof(SCompHeader) * pVnode->cfg.maxSessions + sizeof(TSCKSUM) + TSDB_FILE_HEADER_LEN;
if (fstat.st_size < size) {
dError("vid:%d, head file:%s is corrupted", vnode, fileName);
return -1;
}
#ifdef CLUSTER
//if (fstat.st_size == size) return 0;
#else
if (fstat.st_size == size) return 0;
#endif
vnodeGetHeadDataLname(NULL, fileName, NULL, vnode, fileId);
if (stat(fileName, &fstat) == 0) {
magic = fstat.st_size;
} else {
dError("vid:%d, data file:%s is not there", vnode, fileName);
return -1;
}
vnodeGetHeadDataLname(NULL, NULL, fileName, vnode, fileId);
if (stat(fileName, &fstat) == 0) {
magic += fstat.st_size;
}
int slot = fileId % pVnode->maxFiles;
pVnode->fmagic[slot] = magic;
return 0;
}
int vnodeInitFile(int vnode) {
int code = TSDB_CODE_SUCCESS;
SVnodeObj *pVnode = vnodeList + vnode;
pVnode->maxFiles = pVnode->cfg.daysToKeep / pVnode->cfg.daysPerFile + 1;
pVnode->maxFile1 = pVnode->cfg.daysToKeep1 / pVnode->cfg.daysPerFile;
pVnode->maxFile2 = pVnode->cfg.daysToKeep2 / pVnode->cfg.daysPerFile;
pVnode->fmagic = (uint64_t *)calloc(pVnode->maxFiles + 1, sizeof(uint64_t));
int fileId = pVnode->fileId;
/*
* The actual files will far exceed the files that need to exist
*/
if (pVnode->numOfFiles > pVnode->maxFiles) {
dError("vid:%d numOfFiles:%d should not larger than maxFiles:%d", vnode, pVnode->numOfFiles, pVnode->maxFiles);
}
int numOfFiles = MIN(pVnode->numOfFiles, pVnode->maxFiles);
for (int i = 0; i < numOfFiles; ++i) {
if (vnodeUpdateFileMagic(vnode, fileId) < 0) {
if (pVnode->cfg.replications > 1) {
pVnode->badFileId = fileId;
}
dError("vid:%d fileId:%d is corrupted", vnode, fileId);
} else {
dTrace("vid:%d fileId:%d is checked", vnode, fileId);
}
fileId--;
}
return code;
}
int vnodeRecoverCompHeader(int vnode, int fileId) {
// TODO: try to recover SCompHeader part
dTrace("starting to recover vnode head file comp header part, vnode: %d fileId: %d", vnode, fileId);
assert(0);
return 0;
}
int vnodeRecoverHeadFile(int vnode, int fileId) {
// TODO: try to recover SCompHeader part
dTrace("starting to recover vnode head file, vnode: %d, fileId: %d", vnode, fileId);
assert(0);
return 0;
}
int vnodeRecoverDataFile(int vnode, int fileId) {
// TODO: try to recover SCompHeader part
dTrace("starting to recover vnode data file, vnode: %d, fileId: %d", vnode, fileId);
assert(0);
return 0;
}
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "vnode.h"
#include "vnodeFile.h"
char* vnodeGetDiskFromHeadFile(char *headName) { return tsDirectory; }
char* vnodeGetDataDir(int vnode, int fileId) { return dataDir; }
void vnodeAdustVnodeFile(SVnodeObj *pVnode) {
// Retention policy here
int fileId = pVnode->fileId - pVnode->numOfFiles + 1;
int cfile = taosGetTimestamp(pVnode->cfg.precision)/pVnode->cfg.daysPerFile/tsMsPerDay[(uint8_t)pVnode->cfg.precision];
while (fileId <= cfile - pVnode->maxFiles) {
vnodeRemoveFile(pVnode->vnode, fileId);
pVnode->numOfFiles--;
fileId++;
}
}
int vnodeCheckNewHeaderFile(int fd, SVnodeObj *pVnode) {
SCompHeader *pHeader = NULL;
SCompBlock *pBlocks = NULL;
int blockSize = 0;
SCompInfo compInfo;
int tmsize = 0;
tmsize = sizeof(SCompHeader) * pVnode->cfg.maxSessions + sizeof(TSCKSUM);
pHeader = (SCompHeader *)malloc(tmsize);
if (pHeader == NULL) return 0;
lseek(fd, TSDB_FILE_HEADER_LEN, SEEK_SET);
if (read(fd, (void *)pHeader, tmsize) != tmsize) {
goto _broken_exit;
}
if (!taosCheckChecksumWhole((uint8_t *)pHeader, tmsize)) {
goto _broken_exit;
}
for (int sid = 0; sid < pVnode->cfg.maxSessions; sid++) {
if (pVnode->meterList == NULL) goto _correct_exit;
if (pVnode->meterList[sid] == NULL || pHeader[sid].compInfoOffset == 0) continue;
lseek(fd, pHeader[sid].compInfoOffset, SEEK_SET);
if (read(fd, (void *)(&compInfo), sizeof(SCompInfo)) != sizeof(SCompInfo)) {
goto _broken_exit;
}
if (!taosCheckChecksumWhole((uint8_t *)(&compInfo), sizeof(SCompInfo))) {
goto _broken_exit;
}
if (compInfo.uid != ((SMeterObj *)pVnode->meterList[sid])->uid) continue;
int expectedSize = sizeof(SCompBlock) * compInfo.numOfBlocks + sizeof(TSCKSUM);
if (blockSize < expectedSize) {
pBlocks = (SCompBlock *)realloc(pBlocks, expectedSize);
if (pBlocks == NULL) {
tfree(pHeader);
return 0;
}
blockSize = expectedSize;
}
if (read(fd, (void *)pBlocks, expectedSize) != expectedSize) {
dError("failed to read block part");
goto _broken_exit;
}
if (!taosCheckChecksumWhole((uint8_t *)pBlocks, expectedSize)) {
dError("block part is broken");
goto _broken_exit;
}
for (int i = 0; i < compInfo.numOfBlocks; i++) {
if (pBlocks[i].last && i != compInfo.numOfBlocks-1) {
dError("last block in middle, block:%d", i);
goto _broken_exit;
}
}
}
_correct_exit:
dPrint("vid: %d new header file %s is correct", pVnode->vnode, pVnode->nfn);
tfree(pBlocks);
tfree(pHeader);
return 0;
_broken_exit:
dError("vid: %d new header file %s is broken", pVnode->vnode, pVnode->nfn);
tfree(pBlocks);
tfree(pHeader);
return -1;
}
\ No newline at end of file
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "os.h"
#include "vnode.h"
int vnodeCheckHeaderFile(int fd, int dfd, SVnodeCfg cfg, int mode) {
SCompHeader *pHeaders = NULL;
SVnodeCfg *pCfg = &cfg;
SCompInfo compInfo;
SCompBlock *pBlocks = NULL;
int blockSize = 0;
SField *pFields = NULL;
char *pBuf = NULL;
int size = 0;
int ret = 0;
if (fd < 0 || dfd < 0) return -1;
lseek(fd, TSDB_FILE_HEADER_LEN, SEEK_SET);
size = pCfg->maxSessions*sizeof(SCompHeader)+sizeof(TSCKSUM);
pHeaders = calloc(1, size);
if (pHeaders == NULL) {
return -1;
}
read(fd, pHeaders, size);
if (!taosCheckChecksumWhole((uint8_t *)pHeaders, size)) {
return -1;
}
for (int i = 0; i < pCfg->maxSessions; i++) {
if (pHeaders[i].compInfoOffset == 0) continue;
if (pHeaders[i].compInfoOffset < 0) {
// TODO : report error here
ret = -1;
continue;
}
lseek(fd, pHeaders[i].compInfoOffset, SEEK_SET);
read(fd, &compInfo, sizeof(SCompInfo));
if (!taosCheckChecksumWhole((uint8_t *)&compInfo, sizeof(SCompInfo))) {
// TODO : report error
ret = -1;
continue;
}
int tsize = sizeof(SCompBlock) * compInfo.numOfBlocks + sizeof(TSCKSUM);
if (tsize > blockSize) {
if (pBlocks == NULL) {
pBlocks = calloc(1, tsize);
} else {
pBlocks = realloc(pBlocks, tsize);
}
blockSize = tsize;
}
read(fd, tsize);
if (!taosCheckChecksumWhole(pBlocks, tsize)) {
// TODO: Report error
ret = -1;
continue;
}
TSKEY keyLast = 0;
for (int j = 0; j < compInfo.numOfBlocks; j++) {
SCompBlock *pBlock = pBlocks + j;
if (pBlock->last != 0 && j < compInfo.numOfBlocks-1) {
// TODO: report error
ret = -1;
break;
}
if (pBlock->offset < TSDB_FILE_HEADER_LEN) {
// TODO : report erro
ret = -1;
break;
}
if (pBlock->keyLast < pBlock->keyFirst) {
// TODO : report error
ret = -1;
break;
}
if (pBlock->keyFirst <= keyLast) {
// TODO : report error
ret = -1;
break;
}
keyLast = pBlock->keyLast;
// Check block in data
lseek(dfd, pBlock->offset, SEEK_SET);
tsize = sizeof(SField) * pBlock->numOfCols + sizeof(TSCKSUM);
pFields = realloc(pFields, tsize);
read(dfd, pFields, tsize);
if (!taosCheckChecksumWhole((uint8_t*)pFields, tsize)) {
// TODO : report error
ret = -1;
continue;
}
for (int k = 0; k < pBlock->numOfCols; k++) {
// TODO: Check pFields[k] content
pBuf = realloc(pBuf, pFields[k].len);
if (!taosCheckChecksumWhole((uint8_t *)pBuf, pFields[k].len)) {
// TODO : report error;
ret = -1;
continue;
}
}
}
}
tfree(pBuf);
tfree(pFields);
tfree(pBlocks);
tfree(pHeaders);
return ret;
}
int vnodePackDataFile(int vnode, int fileId) {
// TODO: check if it is able to pack current file
// TODO: assign value to headerFile and dataFile
char *headerFile = NULL;
char *dataFile = NULL;
char *lastFile = NULL;
SVnodeObj *pVnode = vnodeList+vnode;
SCompHeader *pHeaders = NULL;
SCompBlock *pBlocks = NULL;
int blockSize = 0;
char *pBuff = 0;
int buffSize = 0;
SCompInfo compInfo;
int size = 0;
int hfd = open(headerFile, O_RDONLY);
if (hfd < 0) {
dError("vid: %d, failed to open header file:%s\n", vnode, headerFile);
return -1;
}
int dfd = open(dataFile, O_RDONLY);
if (dfd < 0) {
dError("vid: %d, failed to open data file:%s\n", vnode, dataFile);
return -1;
}
int lfd = open(lastFile, O_RDONLY);
if (lfd < 0) {
dError("vid: %d, failed to open data file:%s\n", vnode, lastFile);
return -1;
}
lseek(hfd, TSDB_FILE_HEADER_LEN, SEEK_SET);
size = sizeof(SCompHeader)*pVnode->cfg.maxSessions+sizeof(TSCKSUM);
pHeaders = malloc(size);
if (pHeaders == NULL) goto _exit_failure;
read(hfd, pHeaders, size);
if (!taosCheckChecksumWhole((uint8_t *)pHeaders, size)) {
dError("vid: %d, header file %s is broken", vnode, headerFile);
goto _exit_failure;
}
for (size_t i = 0; i < pVnode->cfg.maxSessions; i++)
{
if (pHeaders[i].compInfoOffset <= 0) continue;
SMeterObj *pObj = (SMeterObj *)pVnode->meterList[i];
// read compInfo part
lseek(hfd, pHeaders[i].compInfoOffset, SEEK_SET);
read(hfd, &compInfo, sizeof(SCompInfo));
if (!taosCheckChecksumWhole((uint8_t *)&compInfo, sizeof(SCompInfo))) {
dError("vid: %d sid:%d fileId:%d compInfo is broken", vnode, i, fileId);
goto _exit_failure;
}
// read compBlock part
int tsize = compInfo.numOfBlocks * sizeof(SCompBlock) + sizeof(TSCKSUM);
if (tsize > blockSize) {
if (blockSize == 0) {
pBlocks = malloc(tsize);
} else {
pBlocks = realloc(pBlocks, tsize);
}
blockSize = tsize;
}
read(hfd, pBlocks, tsize);
if (!taosCheckChecksumWhole((uint8_t *)pBlocks, tsize)) {
dError("vid:%d sid:%d fileId:%d block part is broken", vnode, i, fileId);
goto _exit_failure;
}
assert(compInfo.numOfBlocks > 0);
// Loop to scan the blocks and merge block when neccessary.
tsize = sizeof(SCompInfo) + compInfo.numOfBlocks *sizeof(SCompBlock) + sizeof(TSCKSUM);
pBuff = realloc(pBuff, tsize);
SCompInfo *pInfo = (SCompInfo *)pBuff;
SCompBlock *pNBlocks = pBuff + sizeof(SCompInfo);
int nCounter = 0;
for (int j; j < compInfo.numOfBlocks; j++) {
// TODO : Check if it is the last block
// if (j == compInfo.numOfBlocks - 1) {}
if (pBlocks[j].numOfPoints + pNBlocks[nCounter].numOfPoints <= pObj->pointsPerFileBlock) {
// Merge current block to current new block
} else {
// Write new block to new data file
// pNBlocks[nCounter].
nCounter++;
}
}
}
return 0;
_exit_failure:
tfree(pHeaders);
if (hfd > 0) close(hfd);
if (dfd > 0) close(dfd);
if (lfd > 0) close(lfd);
return -1;
}
\ No newline at end of file
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "os.h"
#include "taosmsg.h"
#include "tsqlfunction.h"
#include "vnode.h"
#include "vnodeDataFilterFunc.h"
bool less_i8(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(int8_t *)minval < pFilter->filterInfo.upperBndi);
}
bool less_i16(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(int16_t *)minval < pFilter->filterInfo.upperBndi);
}
bool less_i32(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(int32_t *)minval < pFilter->filterInfo.upperBndi);
}
bool less_i64(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(int64_t *)minval < pFilter->filterInfo.upperBndi);
}
bool less_ds(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(float *)minval < pFilter->filterInfo.upperBndd);
}
bool less_dd(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(double *)minval < pFilter->filterInfo.upperBndd);
}
//////////////////////////////////////////////////////////////////
bool large_i8(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(int8_t *)maxval > pFilter->filterInfo.lowerBndi);
}
bool large_i16(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(int16_t *)maxval > pFilter->filterInfo.lowerBndi);
}
bool large_i32(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(int32_t *)maxval > pFilter->filterInfo.lowerBndi);
}
bool large_i64(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(int64_t *)maxval > pFilter->filterInfo.lowerBndi);
}
bool large_ds(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(float *)maxval > pFilter->filterInfo.lowerBndd);
}
bool large_dd(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(double *)maxval > pFilter->filterInfo.lowerBndd);
}
/////////////////////////////////////////////////////////////////////
bool lessEqual_i8(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(int8_t *)minval <= pFilter->filterInfo.upperBndi);
}
bool lessEqual_i16(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(int16_t *)minval <= pFilter->filterInfo.upperBndi);
}
bool lessEqual_i32(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(int32_t *)minval <= pFilter->filterInfo.upperBndi);
}
bool lessEqual_i64(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(int64_t *)minval <= pFilter->filterInfo.upperBndi);
}
bool lessEqual_ds(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(float *)minval <= pFilter->filterInfo.upperBndd);
}
bool lessEqual_dd(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(double *)minval <= pFilter->filterInfo.upperBndd);
}
//////////////////////////////////////////////////////////////////////////
bool largeEqual_i8(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(int8_t *)maxval >= pFilter->filterInfo.lowerBndi);
}
bool largeEqual_i16(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(int16_t *)maxval >= pFilter->filterInfo.lowerBndi);
}
bool largeEqual_i32(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(int32_t *)maxval >= pFilter->filterInfo.lowerBndi);
}
bool largeEqual_i64(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(int64_t *)maxval >= pFilter->filterInfo.lowerBndi);
}
bool largeEqual_ds(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(float *)maxval >= pFilter->filterInfo.lowerBndd);
}
bool largeEqual_dd(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(double *)maxval >= pFilter->filterInfo.lowerBndd);
}
////////////////////////////////////////////////////////////////////////
bool equal_i8(SColumnFilterElem *pFilter, char *minval, char *maxval) {
if (*(int8_t *)minval == *(int8_t *)maxval) {
return (*(int8_t *)minval == pFilter->filterInfo.lowerBndi);
} else { /* range filter */
assert(*(int8_t *)minval < *(int8_t *)maxval);
return *(int8_t *)minval <= pFilter->filterInfo.lowerBndi && *(int8_t *)maxval >= pFilter->filterInfo.lowerBndi;
}
}
bool equal_i16(SColumnFilterElem *pFilter, char *minval, char *maxval) {
if (*(int16_t *)minval == *(int16_t *)maxval) {
return (*(int16_t *)minval == pFilter->filterInfo.lowerBndi);
} else { /* range filter */
assert(*(int16_t *)minval < *(int16_t *)maxval);
return *(int16_t *)minval <= pFilter->filterInfo.lowerBndi && *(int16_t *)maxval >= pFilter->filterInfo.lowerBndi;
}
}
bool equal_i32(SColumnFilterElem *pFilter, char *minval, char *maxval) {
if (*(int32_t *)minval == *(int32_t *)maxval) {
return (*(int32_t *)minval == pFilter->filterInfo.lowerBndi);
} else { /* range filter */
assert(*(int32_t *)minval < *(int32_t *)maxval);
return *(int32_t *)minval <= pFilter->filterInfo.lowerBndi && *(int32_t *)maxval >= pFilter->filterInfo.lowerBndi;
}
}
bool equal_i64(SColumnFilterElem *pFilter, char *minval, char *maxval) {
if (*(int64_t *)minval == *(int64_t *)maxval) {
return (*(int64_t *)minval == pFilter->filterInfo.lowerBndi);
} else { /* range filter */
assert(*(int64_t *)minval < *(int64_t *)maxval);
return *(int64_t *)minval <= pFilter->filterInfo.lowerBndi && *(int64_t *)maxval >= pFilter->filterInfo.lowerBndi;
}
}
bool equal_ds(SColumnFilterElem *pFilter, char *minval, char *maxval) {
if (*(float *)minval == *(float *)maxval) {
return (fabs(*(float *)minval - pFilter->filterInfo.lowerBndd) <= FLT_EPSILON);
} else { /* range filter */
assert(*(float *)minval < *(float *)maxval);
return *(float *)minval <= pFilter->filterInfo.lowerBndd && *(float *)maxval >= pFilter->filterInfo.lowerBndd;
}
}
bool equal_dd(SColumnFilterElem *pFilter, char *minval, char *maxval) {
if (*(double *)minval == *(double *)maxval) {
return (*(double *)minval == pFilter->filterInfo.lowerBndd);
} else { /* range filter */
assert(*(double *)minval < *(double *)maxval);
return *(double *)minval <= pFilter->filterInfo.lowerBndi && *(double *)maxval >= pFilter->filterInfo.lowerBndi;
}
}
bool equal_str(SColumnFilterElem *pFilter, char *minval, char *maxval) {
// query condition string is greater than the max length of string, not qualified data
if (pFilter->filterInfo.len > pFilter->bytes) {
return false;
}
return strncmp((char *)pFilter->filterInfo.pz, minval, pFilter->bytes) == 0;
}
bool equal_nchar(SColumnFilterElem *pFilter, char *minval, char *maxval) {
// query condition string is greater than the max length of string, not qualified data
if (pFilter->filterInfo.len > pFilter->bytes) {
return false;
}
return wcsncmp((wchar_t *)pFilter->filterInfo.pz, (wchar_t*) minval, pFilter->bytes/TSDB_NCHAR_SIZE) == 0;
}
////////////////////////////////////////////////////////////////
bool like_str(SColumnFilterElem *pFilter, char *minval, char *maxval) {
SPatternCompareInfo info = PATTERN_COMPARE_INFO_INITIALIZER;
return patternMatch((char *)pFilter->filterInfo.pz, minval, pFilter->bytes, &info) == TSDB_PATTERN_MATCH;
}
bool like_nchar(SColumnFilterElem* pFilter, char* minval, char *maxval) {
SPatternCompareInfo info = PATTERN_COMPARE_INFO_INITIALIZER;
return WCSPatternMatch((wchar_t*) pFilter->filterInfo.pz, (wchar_t*) minval, pFilter->bytes/TSDB_NCHAR_SIZE, &info) == TSDB_PATTERN_MATCH;
}
////////////////////////////////////////////////////////////////
/**
* If minval equals to maxval, it may serve as the one element filter,
* or all elements of an array are identical during pref-filter stage.
* Otherwise, it must be pre-filter of array list of elements.
*
* During pre-filter stage, if there is one element that locates in [minval, maxval],
* the filter function will return true.
*/
bool nequal_i8(SColumnFilterElem *pFilter, char *minval, char *maxval) {
if (*(int8_t *)minval == *(int8_t *)maxval) {
return (*(int8_t *)minval != pFilter->filterInfo.lowerBndi);
}
return true;
}
bool nequal_i16(SColumnFilterElem *pFilter, char *minval, char *maxval) {
if (*(int16_t *)minval == *(int16_t *)maxval) {
return (*(int16_t *)minval != pFilter->filterInfo.lowerBndi);
}
return true;
}
bool nequal_i32(SColumnFilterElem *pFilter, char *minval, char *maxval) {
if (*(int32_t *)minval == *(int32_t *)maxval) {
return (*(int32_t *)minval != pFilter->filterInfo.lowerBndi);
}
return true;
}
bool nequal_i64(SColumnFilterElem *pFilter, char *minval, char *maxval) {
if (*(int64_t *)minval == *(int64_t *)maxval) {
return (*(int64_t *)minval != pFilter->filterInfo.lowerBndi);
}
return true;
}
bool nequal_ds(SColumnFilterElem *pFilter, char *minval, char *maxval) {
if (*(float *)minval == *(float *)maxval) {
return (*(float *)minval != pFilter->filterInfo.lowerBndd);
}
return true;
}
bool nequal_dd(SColumnFilterElem *pFilter, char *minval, char *maxval) {
if (*(double *)minval == *(double *)maxval) {
return (*(double *)minval != pFilter->filterInfo.lowerBndd);
}
return true;
}
bool nequal_str(SColumnFilterElem *pFilter, char *minval, char *maxval) {
if (pFilter->filterInfo.len > pFilter->bytes) {
return true;
}
return strncmp((char *)pFilter->filterInfo.pz, minval, pFilter->bytes) != 0;
}
bool nequal_nchar(SColumnFilterElem *pFilter, char* minval, char *maxval) {
if (pFilter->filterInfo.len > pFilter->bytes) {
return true;
}
return wcsncmp((wchar_t *)pFilter->filterInfo.pz, (wchar_t*)minval, pFilter->bytes/TSDB_NCHAR_SIZE) != 0;
}
////////////////////////////////////////////////////////////////
bool rangeFilter_i32_ii(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(int32_t *)minval <= pFilter->filterInfo.upperBndi && *(int32_t *)maxval >= pFilter->filterInfo.lowerBndi);
}
bool rangeFilter_i32_ee(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(int32_t *)minval<pFilter->filterInfo.upperBndi &&*(int32_t *)maxval> pFilter->filterInfo.lowerBndi);
}
bool rangeFilter_i32_ie(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(int32_t *)minval < pFilter->filterInfo.upperBndi && *(int32_t *)maxval >= pFilter->filterInfo.lowerBndi);
}
bool rangeFilter_i32_ei(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(int32_t *)minval <= pFilter->filterInfo.upperBndi && *(int32_t *)maxval > pFilter->filterInfo.lowerBndi);
}
///////////////////////////////////////////////////////////////////////////////
bool rangeFilter_i8_ii(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(int8_t *)minval <= pFilter->filterInfo.upperBndi && *(int8_t *)maxval >= pFilter->filterInfo.lowerBndi);
}
bool rangeFilter_i8_ee(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(int8_t *)minval<pFilter->filterInfo.upperBndi &&*(int8_t *)maxval> pFilter->filterInfo.lowerBndi);
}
bool rangeFilter_i8_ie(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(int8_t *)minval < pFilter->filterInfo.upperBndi && *(int8_t *)maxval >= pFilter->filterInfo.lowerBndi);
}
bool rangeFilter_i8_ei(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(int8_t *)minval <= pFilter->filterInfo.upperBndi && *(int8_t *)maxval > pFilter->filterInfo.lowerBndi);
}
/////////////////////////////////////////////////////////////////////////////////////
bool rangeFilter_i16_ii(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(int16_t *)minval <= pFilter->filterInfo.upperBndi && *(int16_t *)maxval >= pFilter->filterInfo.lowerBndi);
}
bool rangeFilter_i16_ee(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(int16_t *)minval<pFilter->filterInfo.upperBndi &&*(int16_t *)maxval> pFilter->filterInfo.lowerBndi);
}
bool rangeFilter_i16_ie(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(int16_t *)minval < pFilter->filterInfo.upperBndi && *(int16_t *)maxval >= pFilter->filterInfo.lowerBndi);
}
bool rangeFilter_i16_ei(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(int16_t *)minval <= pFilter->filterInfo.upperBndi && *(int16_t *)maxval > pFilter->filterInfo.lowerBndi);
}
////////////////////////////////////////////////////////////////////////
bool rangeFilter_i64_ii(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(int64_t *)minval <= pFilter->filterInfo.upperBndi && *(int64_t *)maxval >= pFilter->filterInfo.lowerBndi);
}
bool rangeFilter_i64_ee(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(int64_t *)minval<pFilter->filterInfo.upperBndi &&*(int64_t *)maxval> pFilter->filterInfo.lowerBndi);
}
bool rangeFilter_i64_ie(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(int64_t *)minval < pFilter->filterInfo.upperBndi && *(int64_t *)maxval >= pFilter->filterInfo.lowerBndi);
}
bool rangeFilter_i64_ei(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(int64_t *)minval <= pFilter->filterInfo.upperBndi && *(int64_t *)maxval > pFilter->filterInfo.lowerBndi);
}
////////////////////////////////////////////////////////////////////////
bool rangeFilter_ds_ii(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(float *)minval <= pFilter->filterInfo.upperBndd && *(float *)maxval >= pFilter->filterInfo.lowerBndd);
}
bool rangeFilter_ds_ee(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(float *)minval<pFilter->filterInfo.upperBndd &&*(float *)maxval> pFilter->filterInfo.lowerBndd);
}
bool rangeFilter_ds_ie(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(float *)minval < pFilter->filterInfo.upperBndd && *(float *)maxval >= pFilter->filterInfo.lowerBndd);
}
bool rangeFilter_ds_ei(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(float *)minval <= pFilter->filterInfo.upperBndd && *(float *)maxval > pFilter->filterInfo.lowerBndd);
}
//////////////////////////////////////////////////////////////////////////
bool rangeFilter_dd_ii(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(double *)minval <= pFilter->filterInfo.upperBndd && *(double *)maxval >= pFilter->filterInfo.lowerBndd);
}
bool rangeFilter_dd_ee(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(double *)minval<pFilter->filterInfo.upperBndd &&*(double *)maxval> pFilter->filterInfo.lowerBndd);
}
bool rangeFilter_dd_ie(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(double *)minval < pFilter->filterInfo.upperBndd && *(double *)maxval >= pFilter->filterInfo.lowerBndd);
}
bool rangeFilter_dd_ei(SColumnFilterElem *pFilter, char *minval, char *maxval) {
return (*(double *)minval <= pFilter->filterInfo.upperBndd && *(double *)maxval > pFilter->filterInfo.lowerBndd);
}
////////////////////////////////////////////////////////////////////////////
bool (*filterFunc_i8[])(SColumnFilterElem *pFilter, char *minval, char *maxval) = {
NULL,
less_i8,
large_i8,
equal_i8,
lessEqual_i8,
largeEqual_i8,
nequal_i8,
NULL,
};
bool (*filterFunc_i16[])(SColumnFilterElem *pFilter, char *minval, char *maxval) = {
NULL,
less_i16,
large_i16,
equal_i16,
lessEqual_i16,
largeEqual_i16,
nequal_i16,
NULL,
};
bool (*filterFunc_i32[])(SColumnFilterElem *pFilter, char *minval, char *maxval) = {
NULL,
less_i32,
large_i32,
equal_i32,
lessEqual_i32,
largeEqual_i32,
nequal_i32,
NULL,
};
bool (*filterFunc_i64[])(SColumnFilterElem *pFilter, char *minval, char *maxval) = {
NULL,
less_i64,
large_i64,
equal_i64,
lessEqual_i64,
largeEqual_i64,
nequal_i64,
NULL,
};
bool (*filterFunc_ds[])(SColumnFilterElem *pFilter, char *minval, char *maxval) = {
NULL,
less_ds,
large_ds,
equal_ds,
lessEqual_ds,
largeEqual_ds,
nequal_ds,
NULL,
};
bool (*filterFunc_dd[])(SColumnFilterElem *pFilter, char *minval, char *maxval) = {
NULL,
less_dd,
large_dd,
equal_dd,
lessEqual_dd,
largeEqual_dd,
nequal_dd,
NULL,
};
bool (*filterFunc_str[])(SColumnFilterElem* pFilter, char* minval, char *maxval) = {
NULL,
NULL,
NULL,
equal_str,
NULL,
NULL,
nequal_str,
like_str,
};
bool (*filterFunc_nchar[])(SColumnFilterElem* pFitler, char* minval, char* maxval) = {
NULL,
NULL,
NULL,
equal_nchar,
NULL,
NULL,
nequal_nchar,
like_nchar,
};
bool (*rangeFilterFunc_i8[])(SColumnFilterElem *pFilter, char *minval, char *maxval) = {
NULL,
rangeFilter_i8_ee,
rangeFilter_i8_ie,
rangeFilter_i8_ei,
rangeFilter_i8_ii,
};
bool (*rangeFilterFunc_i16[])(SColumnFilterElem *pFilter, char *minval, char *maxval) = {
NULL,
rangeFilter_i16_ee,
rangeFilter_i16_ie,
rangeFilter_i16_ei,
rangeFilter_i16_ii,
};
bool (*rangeFilterFunc_i32[])(SColumnFilterElem *pFilter, char *minval, char *maxval) = {
NULL,
rangeFilter_i32_ee,
rangeFilter_i32_ie,
rangeFilter_i32_ei,
rangeFilter_i32_ii,
};
bool (*rangeFilterFunc_i64[])(SColumnFilterElem *pFilter, char *minval, char *maxval) = {
NULL,
rangeFilter_i64_ee,
rangeFilter_i64_ie,
rangeFilter_i64_ei,
rangeFilter_i64_ii,
};
bool (*rangeFilterFunc_ds[])(SColumnFilterElem *pFilter, char *minval, char *maxval) = {
NULL,
rangeFilter_ds_ee,
rangeFilter_ds_ie,
rangeFilter_ds_ei,
rangeFilter_ds_ii,
};
bool (*rangeFilterFunc_dd[])(SColumnFilterElem *pFilter, char *minval, char *maxval) = {
NULL,
rangeFilter_dd_ee,
rangeFilter_dd_ie,
rangeFilter_dd_ei,
rangeFilter_dd_ii,
};
__filter_func_t* vnodeGetRangeFilterFuncArray(int32_t type) {
switch(type) {
case TSDB_DATA_TYPE_BOOL: return rangeFilterFunc_i8;
case TSDB_DATA_TYPE_TINYINT: return rangeFilterFunc_i8;
case TSDB_DATA_TYPE_SMALLINT: return rangeFilterFunc_i16;
case TSDB_DATA_TYPE_INT: return rangeFilterFunc_i32;
case TSDB_DATA_TYPE_TIMESTAMP: //timestamp uses bigint filter
case TSDB_DATA_TYPE_BIGINT: return rangeFilterFunc_i64;
case TSDB_DATA_TYPE_FLOAT: return rangeFilterFunc_ds;
case TSDB_DATA_TYPE_DOUBLE: return rangeFilterFunc_dd;
default:return NULL;
}
}
__filter_func_t* vnodeGetValueFilterFuncArray(int32_t type) {
switch(type) {
case TSDB_DATA_TYPE_BOOL: return filterFunc_i8;
case TSDB_DATA_TYPE_TINYINT: return filterFunc_i8;
case TSDB_DATA_TYPE_SMALLINT: return filterFunc_i16;
case TSDB_DATA_TYPE_INT: return filterFunc_i32;
case TSDB_DATA_TYPE_TIMESTAMP: //timestamp uses bigint filter
case TSDB_DATA_TYPE_BIGINT: return filterFunc_i64;
case TSDB_DATA_TYPE_FLOAT: return filterFunc_ds;
case TSDB_DATA_TYPE_DOUBLE: return filterFunc_dd;
case TSDB_DATA_TYPE_BINARY: return filterFunc_str;
case TSDB_DATA_TYPE_NCHAR: return filterFunc_nchar;
default: return NULL;
}
}
bool vnodeSupportPrefilter(int32_t type) { return type != TSDB_DATA_TYPE_BINARY && type != TSDB_DATA_TYPE_NCHAR; }
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "os.h"
#include "vnode.h"
#include "vnodeUtil.h"
#include "vnodeStatus.h"
extern void vnodeGetHeadTname(char *nHeadName, char *nLastName, int vnode, int fileId);
extern int vnodeReadColumnToMem(int fd, SCompBlock *pBlock, SField **fields, int col, char *data, int dataSize,
char *temp, char *buffer, int bufferSize);
extern int vnodeSendShellSubmitRspMsg(SShellObj *pObj, int code, int numOfPoints);
extern void vnodeGetHeadDataLname(char *headName, char *dataName, char *lastName, int vnode, int fileId);
extern int vnodeCreateEmptyCompFile(int vnode, int fileId);
extern int vnodeUpdateFreeSlot(SVnodeObj *pVnode);
extern SCacheBlock *vnodeGetFreeCacheBlock(SVnodeObj *pVnode);
extern int vnodeCreateNeccessaryFiles(SVnodeObj *pVnode);
#define KEY_AT_INDEX(payload, step, idx) (*(TSKEY *)((char *)(payload) + (step) * (idx)))
typedef struct {
void * signature;
SShellObj *pShell;
SMeterObj *pObj;
int retry;
TSKEY firstKey;
TSKEY lastKey;
int importedRows;
int commit; // start to commit if it is set to 1
int slot; // slot/block to start writing the import data
int pos; // pos to start writing the import data in the slot/block
TSKEY key;
// only for file
int numOfPoints;
int64_t offset; // offset in data file
char * payload;
char * opayload; // allocated space for payload from client
int rows;
} SImportInfo;
typedef struct {
// in .head file
SCompHeader *pHeader;
size_t pHeaderSize;
SCompInfo compInfo;
SCompBlock *pBlocks;
// in .data file
int blockId;
uint8_t blockLoadState;
SField *pField;
size_t pFieldSize;
SData *data[TSDB_MAX_COLUMNS];
char * buffer;
char *temp;
char * tempBuffer;
size_t tempBufferSize;
// Variables for sendfile
int64_t compInfoOffset;
int64_t nextNo0Offset; // next sid whose compInfoOffset > 0
int64_t hfSize;
int64_t driftOffset;
int oldNumOfBlocks;
int newNumOfBlocks;
int last;
} SImportHandle;
typedef struct {
int slot;
int pos;
int oslot; // old slot
TSKEY nextKey;
} SBlockIter;
typedef struct {
int64_t spos;
int64_t epos;
int64_t totalRows;
char * offset[];
} SMergeBuffer;
int vnodeImportData(SMeterObj *pObj, SImportInfo *pImport);
int vnodeFindKeyInCache(SImportInfo *pImport, int order) {
SMeterObj * pObj = pImport->pObj;
int code = 0;
SQuery query;
SCacheInfo *pInfo = (SCacheInfo *)pObj->pCache;
TSKEY key = order ? pImport->firstKey : pImport->lastKey;
memset(&query, 0, sizeof(query));
query.order.order = order;
query.skey = key;
query.ekey = order ? pImport->lastKey : pImport->firstKey;
vnodeSearchPointInCache(pObj, &query);
if (query.slot < 0) {
pImport->slot = pInfo->commitSlot;
if (pInfo->commitPoint >= pObj->pointsPerBlock) pImport->slot = (pImport->slot + 1) % pInfo->maxBlocks;
pImport->pos = 0;
pImport->key = 0;
dTrace("vid:%d sid:%d id:%s, key:%" PRId64 ", import to head of cache", pObj->vnode, pObj->sid, pObj->meterId, key);
code = 0;
} else {
pImport->slot = query.slot;
pImport->pos = query.pos;
pImport->key = query.key;
if (key != query.key) {
if (order == 0) {
// since pos is the position which has smaller key, data shall be imported after it
pImport->pos++;
if (pImport->pos >= pObj->pointsPerBlock) {
pImport->slot = (pImport->slot + 1) % pInfo->maxBlocks;
pImport->pos = 0;
}
} else {
if (pImport->pos < 0) pImport->pos = 0;
}
}
code = 0;
}
return code;
}
void vnodeGetValidDataRange(int vnode, TSKEY now, TSKEY *minKey, TSKEY *maxKey) {
SVnodeObj *pVnode = vnodeList + vnode;
int64_t delta = pVnode->cfg.daysPerFile * tsMsPerDay[(uint8_t)pVnode->cfg.precision];
int fid = now / delta;
*minKey = (fid - pVnode->maxFiles + 1) * delta;
*maxKey = (fid + 2) * delta - 1;
return;
}
int vnodeImportPoints(SMeterObj *pObj, char *cont, int contLen, char source, void *param, int sversion,
int *pNumOfPoints, TSKEY now) {
SSubmitMsg *pSubmit = (SSubmitMsg *)cont;
SVnodeObj * pVnode = vnodeList + pObj->vnode;
int rows = 0;
char * payload = NULL;
int code = TSDB_CODE_SUCCESS;
SCachePool *pPool = (SCachePool *)(pVnode->pCachePool);
SShellObj * pShell = (SShellObj *)param;
TSKEY firstKey, lastKey;
payload = pSubmit->payLoad;
rows = htons(pSubmit->numOfRows);
assert(rows > 0);
int expectedLen = rows * pObj->bytesPerPoint + sizeof(pSubmit->numOfRows);
if (expectedLen != contLen) {
dError("vid:%d sid:%d id:%s, invalid import, expected:%d, contLen:%d", pObj->vnode, pObj->sid, pObj->meterId,
expectedLen, contLen);
return TSDB_CODE_WRONG_MSG_SIZE;
}
// Check timestamp context.
TSKEY minKey = 0, maxKey = 0;
firstKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, 0);
lastKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, rows - 1);
assert(firstKey <= lastKey);
vnodeGetValidDataRange(pObj->vnode, now, &minKey, &maxKey);
if (firstKey < minKey || firstKey > maxKey || lastKey < minKey || lastKey > maxKey) {
dError(
"vid:%d sid:%d id:%s, invalid timestamp to import, rows:%d firstKey: %" PRId64 " lastKey: %" PRId64 " minAllowedKey:%" PRId64 " "
"maxAllowedKey:%" PRId64,
pObj->vnode, pObj->sid, pObj->meterId, rows, firstKey, lastKey, minKey, maxKey);
return TSDB_CODE_TIMESTAMP_OUT_OF_RANGE;
}
// forward to peers
if (pShell && pVnode->cfg.replications > 1) {
code = vnodeForwardToPeer(pObj, cont, contLen, TSDB_ACTION_IMPORT, sversion);
if (code != 0) return code;
}
if (pVnode->cfg.commitLog && source != TSDB_DATA_SOURCE_LOG) {
if (pVnode->logFd < 0) return TSDB_CODE_INVALID_COMMIT_LOG;
code = vnodeWriteToCommitLog(pObj, TSDB_ACTION_IMPORT, cont, contLen, sversion);
if (code != 0) return code;
}
/*
* The timestamp of all records in a submit payload are always in ascending order, guaranteed by client, so here only
* the first key.
*/
if (firstKey > pObj->lastKey) { // Just call insert
code = vnodeInsertPoints(pObj, cont, contLen, TSDB_DATA_SOURCE_LOG, NULL, sversion, pNumOfPoints, now);
} else { // trigger import
if (sversion != pObj->sversion) {
dError("vid:%d sid:%d id:%s, invalid sversion, expected:%d received:%d", pObj->vnode, pObj->sid, pObj->meterId,
pObj->sversion, sversion);
return TSDB_CODE_OTHERS;
}
// check the table status for perform import historical data
if ((code = vnodeSetMeterInsertImportStateEx(pObj, TSDB_METER_STATE_IMPORTING)) != TSDB_CODE_SUCCESS) {
return code;
}
SImportInfo import = {0};
dTrace("vid:%d sid:%d id:%s, try to import %d rows data, firstKey:%" PRId64 ", lastKey:%" PRId64 ", object lastKey:%" PRId64,
pObj->vnode, pObj->sid, pObj->meterId, rows, firstKey, lastKey, pObj->lastKey);
import.firstKey = firstKey;
import.lastKey = lastKey;
import.pObj = pObj;
import.pShell = pShell;
import.payload = payload;
import.rows = rows;
// FIXME: mutex here seems meaningless and num here still can be changed
int32_t num = 0;
pthread_mutex_lock(&pVnode->vmutex);
num = pObj->numOfQueries;
pthread_mutex_unlock(&pVnode->vmutex);
int32_t commitInProcess = 0;
pthread_mutex_lock(&pPool->vmutex);
if (((commitInProcess = pPool->commitInProcess) == 1) || num > 0) {
// mutual exclusion with read (need to change here)
pthread_mutex_unlock(&pPool->vmutex);
vnodeClearMeterState(pObj, TSDB_METER_STATE_IMPORTING);
return TSDB_CODE_ACTION_IN_PROGRESS;
} else {
pPool->commitInProcess = 1;
pthread_mutex_unlock(&pPool->vmutex);
code = vnodeImportData(pObj, &import);
*pNumOfPoints = import.importedRows;
}
pVnode->version++;
vnodeClearMeterState(pObj, TSDB_METER_STATE_IMPORTING);
}
return code;
}
/* Function to search keys in a range
*
* Assumption: keys in payload are in ascending order
*
* @payload: data records, key in ascending order
* @step: bytes each record takes
* @rows: number of data records
* @skey: range start (included)
* @ekey: range end (included)
* @srows: rtype, start index of records
* @nrows: rtype, number of records in range
*
* @rtype: 0 means find data in the range
* -1 means find no data in the range
*/
static int vnodeSearchKeyInRange(char *payload, int step, int rows, TSKEY skey, TSKEY ekey, int *srow, int *nrows) {
if (rows <= 0 || KEY_AT_INDEX(payload, step, 0) > ekey || KEY_AT_INDEX(payload, step, rows - 1) < skey || skey > ekey)
return -1;
int left = 0;
int right = rows - 1;
int mid;
// Binary search the first key in payload >= skey
do {
mid = (left + right) / 2;
if (skey < KEY_AT_INDEX(payload, step, mid)) {
right = mid;
} else if (skey > KEY_AT_INDEX(payload, step, mid)) {
left = mid + 1;
} else {
break;
}
} while (left < right);
if (skey <= KEY_AT_INDEX(payload, step, mid)) {
*srow = mid;
} else {
if (mid + 1 >= rows) {
return -1;
} else {
*srow = mid + 1;
}
}
assert(skey <= KEY_AT_INDEX(payload, step, *srow));
*nrows = 0;
for (int i = *srow; i < rows; i++) {
if (KEY_AT_INDEX(payload, step, i) <= ekey) {
(*nrows)++;
} else {
break;
}
}
if (*nrows == 0) return -1;
return 0;
}
int vnodeOpenMinFilesForImport(int vnode, int fid) {
char dname[TSDB_FILENAME_LEN] = "\0";
SVnodeObj * pVnode = vnodeList + vnode;
struct stat filestat;
int minFileSize;
minFileSize = TSDB_FILE_HEADER_LEN + sizeof(SCompHeader) * pVnode->cfg.maxSessions + sizeof(TSCKSUM);
vnodeGetHeadDataLname(pVnode->cfn, dname, pVnode->lfn, vnode, fid);
// Open .head file
pVnode->hfd = open(pVnode->cfn, O_RDONLY);
if (pVnode->hfd < 0) {
dError("vid:%d, failed to open head file:%s, reason:%s", vnode, pVnode->cfn, strerror(errno));
taosLogError("vid:%d, failed to open head file:%s, reason:%s", vnode, pVnode->cfn, strerror(errno));
goto _error_open;
}
fstat(pVnode->hfd, &filestat);
if (filestat.st_size < minFileSize) {
dError("vid:%d, head file:%s is corrupted", vnode, pVnode->cfn);
taosLogError("vid:%d, head file:%s corrupted", vnode, pVnode->cfn);
goto _error_open;
}
// Open .data file
pVnode->dfd = open(dname, O_RDWR);
if (pVnode->dfd < 0) {
dError("vid:%d, failed to open data file:%s, reason:%s", vnode, dname, strerror(errno));
taosLogError("vid:%d, failed to open data file:%s, reason:%s", vnode, dname, strerror(errno));
goto _error_open;
}
fstat(pVnode->dfd, &filestat);
if (filestat.st_size < TSDB_FILE_HEADER_LEN) {
dError("vid:%d, data file:%s corrupted", vnode, dname);
taosLogError("vid:%d, data file:%s corrupted", vnode, dname);
goto _error_open;
}
// Open .last file
pVnode->lfd = open(pVnode->lfn, O_RDWR);
if (pVnode->lfd < 0) {
dError("vid:%d, failed to open last file:%s, reason:%s", vnode, pVnode->lfn, strerror(errno));
taosLogError("vid:%d, failed to open last file:%s, reason:%s", vnode, pVnode->lfn, strerror(errno));
goto _error_open;
}
fstat(pVnode->lfd, &filestat);
if (filestat.st_size < TSDB_FILE_HEADER_LEN) {
dError("vid:%d, last file:%s corrupted", vnode, pVnode->lfn);
taosLogError("vid:%d, last file:%s corrupted", vnode, pVnode->lfn);
goto _error_open;
}
return 0;
_error_open:
if (pVnode->hfd > 0) close(pVnode->hfd);
pVnode->hfd = 0;
if (pVnode->dfd > 0) close(pVnode->dfd);
pVnode->dfd = 0;
if (pVnode->lfd > 0) close(pVnode->lfd);
pVnode->lfd = 0;
return -1;
}
/* Function to open .t file and sendfile the first part
*/
int vnodeOpenTempFilesForImport(SImportHandle *pHandle, SMeterObj *pObj, int fid) {
char dHeadName[TSDB_FILENAME_LEN] = "\0";
SVnodeObj * pVnode = vnodeList + pObj->vnode;
struct stat filestat;
int sid;
// cfn: .head
if (readlink(pVnode->cfn, dHeadName, TSDB_FILENAME_LEN) < 0) return -1;
size_t len = strlen(dHeadName);
// switch head name
switch (dHeadName[len - 1]) {
case '0':
dHeadName[len - 1] = '1';
break;
case '1':
dHeadName[len - 1] = '0';
break;
default:
dError("vid: %d, fid: %d, head target filename not end with 0 or 1", pVnode->vnode, fid);
return -1;
}
vnodeGetHeadTname(pVnode->nfn, NULL, pVnode->vnode, fid);
if (symlink(dHeadName, pVnode->nfn) < 0) return -1;
pVnode->nfd = open(pVnode->nfn, O_RDWR | O_CREAT | O_TRUNC, S_IRWXU | S_IRWXG | S_IRWXO);
if (pVnode->nfd < 0) {
dError("vid:%d, failed to open new head file:%s, reason:%s", pVnode->vnode, pVnode->nfn, strerror(errno));
taosLogError("vid:%d, failed to open new head file:%s, reason:%s", pVnode->vnode, pVnode->nfn, strerror(errno));
return -1;
}
fstat(pVnode->hfd, &filestat);
pHandle->hfSize = filestat.st_size;
// Find the next sid whose compInfoOffset > 0
for (sid = pObj->sid + 1; sid < pVnode->cfg.maxSessions; sid++) {
if (pHandle->pHeader[sid].compInfoOffset > 0) break;
}
pHandle->nextNo0Offset = (sid == pVnode->cfg.maxSessions) ? pHandle->hfSize : pHandle->pHeader[sid].compInfoOffset;
// FIXME: sendfile the original part
// TODO: Here, we need to take the deleted table case in consideration, this function
// just assume the case is handled before calling this function
if (pHandle->pHeader[pObj->sid].compInfoOffset > 0) {
pHandle->compInfoOffset = pHandle->pHeader[pObj->sid].compInfoOffset;
} else {
pHandle->compInfoOffset = pHandle->nextNo0Offset;
}
assert(pHandle->compInfoOffset <= pHandle->hfSize);
lseek(pVnode->hfd, 0, SEEK_SET);
lseek(pVnode->nfd, 0, SEEK_SET);
if (tsendfile(pVnode->nfd, pVnode->hfd, NULL, pHandle->compInfoOffset) < 0) {
return -1;
}
// Leave a SCompInfo space here
lseek(pVnode->nfd, sizeof(SCompInfo), SEEK_CUR);
return 0;
}
typedef enum { DATA_LOAD_TIMESTAMP = 0x1, DATA_LOAD_OTHER_DATA = 0x2 } DataLoadMod;
/* Function to load a block data at the requirement of mod
*/
static int vnodeLoadNeededBlockData(SMeterObj *pObj, SImportHandle *pHandle, int blockId, uint8_t loadMod, int *code) {
size_t size;
SCompBlock *pBlock = pHandle->pBlocks + blockId;
*code = TSDB_CODE_SUCCESS;
SVnodeObj *pVnode = vnodeList + pObj->vnode;
int dfd = pBlock->last ? pVnode->lfd : pVnode->dfd;
if (pHandle->blockId != blockId) {
pHandle->blockId = blockId;
pHandle->blockLoadState = 0;
}
if (pHandle->blockLoadState == 0){ // Reload pField
size = sizeof(SField) * pBlock->numOfCols + sizeof(TSCKSUM);
if (pHandle->pFieldSize < size) {
pHandle->pField = (SField *)realloc((void *)(pHandle->pField), size);
if (pHandle->pField == NULL) {
dError("vid: %d, sid: %d, meterId: %s, failed to allocate memory, size: %ul", pObj->vnode, pObj->sid,
pObj->meterId, size);
*code = TSDB_CODE_SERV_OUT_OF_MEMORY;
return -1;
}
pHandle->pFieldSize = size;
}
lseek(dfd, pBlock->offset, SEEK_SET);
if (read(dfd, (void *)(pHandle->pField), pHandle->pFieldSize) < 0) {
dError("vid:%d sid:%d meterId:%s, failed to read data file, size:%zu reason:%s", pVnode->vnode, pObj->sid,
pObj->meterId, pHandle->pFieldSize, strerror(errno));
*code = TSDB_CODE_FILE_CORRUPTED;
return -1;
}
if (!taosCheckChecksumWhole((uint8_t *)(pHandle->pField), pHandle->pFieldSize)) {
dError("vid:%d sid:%d meterId:%s, data file %s is broken since checksum mismatch", pVnode->vnode, pObj->sid,
pObj->meterId, pVnode->lfn);
*code = TSDB_CODE_FILE_CORRUPTED;
return -1;
}
}
{ // Allocate necessary buffer
size = pObj->bytesPerPoint * pObj->pointsPerFileBlock +
(sizeof(SData) + EXTRA_BYTES + sizeof(TSCKSUM)) * pObj->numOfColumns;
if (pHandle->buffer == NULL) {
pHandle->buffer = malloc(size);
if (pHandle->buffer == NULL) {
dError("vid: %d, sid: %d, meterId: %s, failed to allocate memory, size: %ul", pObj->vnode, pObj->sid,
pObj->meterId, size);
*code = TSDB_CODE_SERV_OUT_OF_MEMORY;
return -1;
}
// TODO: Init data
pHandle->data[0] = (SData *)(pHandle->buffer);
for (int col = 1; col < pObj->numOfColumns; col++) {
pHandle->data[col] = (SData *)((char *)(pHandle->data[col - 1]) + sizeof(SData) + EXTRA_BYTES +
sizeof(TSCKSUM) + pObj->pointsPerFileBlock * pObj->schema[col - 1].bytes);
}
}
if (pHandle->temp == NULL) {
pHandle->temp = malloc(size);
if (pHandle->temp == NULL) {
dError("vid: %d, sid: %d, meterId: %s, failed to allocate memory, size: %ul", pObj->vnode, pObj->sid,
pObj->meterId, size);
*code = TSDB_CODE_SERV_OUT_OF_MEMORY;
return -1;
}
}
if (pHandle->tempBuffer == NULL) {
pHandle->tempBufferSize = pObj->maxBytes * pObj->pointsPerFileBlock + EXTRA_BYTES + sizeof(TSCKSUM);
pHandle->tempBuffer = malloc(pHandle->tempBufferSize);
if (pHandle->tempBuffer == NULL) {
dError("vid: %d, sid: %d, meterId: %s, failed to allocate memory, size: %ul", pObj->vnode, pObj->sid,
pObj->meterId, pHandle->tempBufferSize);
*code = TSDB_CODE_SERV_OUT_OF_MEMORY;
return -1;
}
}
}
if ((loadMod & DATA_LOAD_TIMESTAMP) &&
(~(pHandle->blockLoadState & DATA_LOAD_TIMESTAMP))) { // load only timestamp part
if (vnodeReadColumnToMem(dfd, pBlock, &(pHandle->pField), PRIMARYKEY_TIMESTAMP_COL_INDEX,
pHandle->data[PRIMARYKEY_TIMESTAMP_COL_INDEX]->data, sizeof(TSKEY) * pBlock->numOfPoints,
pHandle->temp, pHandle->tempBuffer, pHandle->tempBufferSize) < 0) {
*code = TSDB_CODE_FILE_CORRUPTED;
return -1;
}
pHandle->blockLoadState |= DATA_LOAD_TIMESTAMP;
}
if ((loadMod & DATA_LOAD_OTHER_DATA) && (~(pHandle->blockLoadState & DATA_LOAD_OTHER_DATA))) { // load other columns
for (int col = 1; col < pBlock->numOfCols; col++) {
if (vnodeReadColumnToMem(dfd, pBlock, &(pHandle->pField), col, pHandle->data[col]->data,
pBlock->numOfPoints * pObj->schema[col].bytes, pHandle->temp, pHandle->tempBuffer,
pHandle->tempBufferSize) < 0) {
*code = TSDB_CODE_FILE_CORRUPTED;
return -1;
}
}
pHandle->blockLoadState |= DATA_LOAD_OTHER_DATA;
}
return 0;
}
static int vnodeCloseImportFiles(SMeterObj *pObj, SImportHandle *pHandle) {
SVnodeObj *pVnode = vnodeList + pObj->vnode;
char dpath[TSDB_FILENAME_LEN] = "\0";
SCompInfo compInfo;
#ifdef _ALPINE
off_t offset = 0;
#else
__off_t offset = 0;
#endif
if (pVnode->nfd > 0) {
offset = lseek(pVnode->nfd, 0, SEEK_CUR);
assert(offset == pHandle->nextNo0Offset + pHandle->driftOffset);
{ // Write the SCompInfo part
compInfo.uid = pObj->uid;
compInfo.last = pHandle->last;
compInfo.numOfBlocks = pHandle->newNumOfBlocks + pHandle->oldNumOfBlocks;
compInfo.delimiter = TSDB_VNODE_DELIMITER;
taosCalcChecksumAppend(0, (uint8_t *)(&compInfo), sizeof(SCompInfo));
lseek(pVnode->nfd, pHandle->compInfoOffset, SEEK_SET);
if (twrite(pVnode->nfd, (void *)(&compInfo), sizeof(SCompInfo)) < 0) {
dError("vid:%d sid:%d meterId:%s, failed to wirte SCompInfo, reason:%s", pObj->vnode, pObj->sid, pObj->meterId,
strerror(errno));
return -1;
}
}
// Write the rest of the SCompBlock part
if (pHandle->hfSize > pHandle->nextNo0Offset) {
lseek(pVnode->nfd, 0, SEEK_END);
lseek(pVnode->hfd, pHandle->nextNo0Offset, SEEK_SET);
if (tsendfile(pVnode->nfd, pVnode->hfd, NULL, pHandle->hfSize - pHandle->nextNo0Offset) < 0) {
dError("vid:%d sid:%d meterId:%s, failed to sendfile, size:%" PRId64 ", reason:%s", pObj->vnode, pObj->sid,
pObj->meterId, pHandle->hfSize - pHandle->nextNo0Offset, strerror(errno));
return -1;
}
}
// Write SCompHeader part
pHandle->pHeader[pObj->sid].compInfoOffset = pHandle->compInfoOffset;
for (int sid = pObj->sid + 1; sid < pVnode->cfg.maxSessions; ++sid) {
if (pHandle->pHeader[sid].compInfoOffset > 0) {
pHandle->pHeader[sid].compInfoOffset += pHandle->driftOffset;
}
}
taosCalcChecksumAppend(0, (uint8_t *)(pHandle->pHeader), pHandle->pHeaderSize);
lseek(pVnode->nfd, TSDB_FILE_HEADER_LEN, SEEK_SET);
if (twrite(pVnode->nfd, (void *)(pHandle->pHeader), pHandle->pHeaderSize) < 0) {
dError("vid:%d sid:%d meterId:%s, failed to wirte SCompHeader part, size:%zu, reason:%s", pObj->vnode, pObj->sid,
pObj->meterId, pHandle->pHeaderSize, strerror(errno));
return -1;
}
}
// Close opened files
close(pVnode->dfd);
pVnode->dfd = 0;
close(pVnode->hfd);
pVnode->hfd = 0;
close(pVnode->lfd);
pVnode->lfd = 0;
if (pVnode->nfd > 0) {
close(pVnode->nfd);
pVnode->nfd = 0;
readlink(pVnode->cfn, dpath, TSDB_FILENAME_LEN);
rename(pVnode->nfn, pVnode->cfn);
remove(dpath);
}
return 0;
}
static void vnodeConvertRowsToCols(SMeterObj *pObj, const char *payload, int rows, SData *data[], int rowOffset) {
int sdataRow;
int offset;
for (int row = 0; row < rows; ++row) {
sdataRow = row + rowOffset;
offset = 0;
for (int col = 0; col < pObj->numOfColumns; ++col) {
memcpy(data[col]->data + sdataRow * pObj->schema[col].bytes, payload + pObj->bytesPerPoint * row + offset,
pObj->schema[col].bytes);
offset += pObj->schema[col].bytes;
}
}
}
static int vnodeMergeDataIntoFile(SImportInfo *pImport, const char *payload, int rows, int fid) {
SMeterObj * pObj = (SMeterObj *)(pImport->pObj);
SVnodeObj * pVnode = vnodeList + pObj->vnode;
SImportHandle importHandle;
size_t size = 0;
SData * data[TSDB_MAX_COLUMNS];
char * buffer = NULL;
SData * cdata[TSDB_MAX_COLUMNS];
char * cbuffer = NULL;
SCompBlock compBlock;
TSCKSUM checksum = 0;
int pointsImported = 0;
int code = TSDB_CODE_SUCCESS;
SCachePool * pPool = (SCachePool *)pVnode->pCachePool;
SCacheInfo * pInfo = (SCacheInfo *)(pObj->pCache);
TSKEY lastKeyImported = 0;
TSKEY delta = pVnode->cfg.daysPerFile * tsMsPerDay[(uint8_t)pVnode->cfg.precision];
TSKEY minFileKey = fid * delta;
TSKEY maxFileKey = minFileKey + delta - 1;
TSKEY firstKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, 0);
TSKEY lastKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, rows - 1);
assert(firstKey >= minFileKey && firstKey <= maxFileKey && lastKey >= minFileKey && lastKey <= maxFileKey);
// create neccessary files
pVnode->commitFirstKey = firstKey;
if (vnodeCreateNeccessaryFiles(pVnode) < 0) return TSDB_CODE_OTHERS;
assert(pVnode->commitFileId == fid);
// Open least files to import .head(hfd) .data(dfd) .last(lfd)
if (vnodeOpenMinFilesForImport(pObj->vnode, fid) < 0) return TSDB_CODE_FILE_CORRUPTED;
memset(&importHandle, 0, sizeof(SImportHandle));
{ // Load SCompHeader part from .head file
importHandle.pHeaderSize = sizeof(SCompHeader) * pVnode->cfg.maxSessions + sizeof(TSCKSUM);
importHandle.pHeader = (SCompHeader *)malloc(importHandle.pHeaderSize);
if (importHandle.pHeader == NULL) {
dError("vid: %d, sid: %d, meterId: %s, failed to allocate memory, size: %ul", pObj->vnode, pObj->sid,
pObj->meterId, importHandle.pHeaderSize);
code = TSDB_CODE_SERV_OUT_OF_MEMORY;
goto _error_merge;
}
lseek(pVnode->hfd, TSDB_FILE_HEADER_LEN, SEEK_SET);
if (read(pVnode->hfd, (void *)(importHandle.pHeader), importHandle.pHeaderSize) < importHandle.pHeaderSize) {
dError("vid: %d, sid: %d, meterId: %s, fid: %d failed to read SCompHeader part, reason:%s", pObj->vnode,
pObj->sid, pObj->meterId, fid, strerror(errno));
code = TSDB_CODE_FILE_CORRUPTED;
goto _error_merge;
}
if (!taosCheckChecksumWhole((uint8_t *)(importHandle.pHeader), importHandle.pHeaderSize)) {
dError("vid: %d, sid: %d, meterId: %s, fid: %d SCompHeader part is broken", pObj->vnode, pObj->sid, pObj->meterId,
fid);
code = TSDB_CODE_FILE_CORRUPTED;
goto _error_merge;
}
}
{ // Initialize data[] and cdata[], which is used to hold data to write to data file
size = pObj->bytesPerPoint * pVnode->cfg.rowsInFileBlock + (sizeof(SData) + EXTRA_BYTES + sizeof(TSCKSUM)) * pObj->numOfColumns;
buffer = (char *)malloc(size);
if (buffer == NULL) {
dError("vid: %d, sid: %d, meterId: %s, failed to allocate memory, size: %ul", pObj->vnode, pObj->sid,
pObj->meterId, size);
code = TSDB_CODE_SERV_OUT_OF_MEMORY;
goto _error_merge;
}
cbuffer = (char *)malloc(size);
if (cbuffer == NULL) {
dError("vid: %d, sid: %d, meterId: %s, failed to allocate memory, size: %ul", pObj->vnode, pObj->sid,
pObj->meterId, size);
code = TSDB_CODE_SERV_OUT_OF_MEMORY;
goto _error_merge;
}
data[0] = (SData *)buffer;
cdata[0] = (SData *)cbuffer;
for (int col = 1; col < pObj->numOfColumns; col++) {
data[col] = (SData *)((char *)data[col - 1] + sizeof(SData) + EXTRA_BYTES + sizeof(TSCKSUM) +
pObj->pointsPerFileBlock * pObj->schema[col - 1].bytes);
cdata[col] = (SData *)((char *)cdata[col - 1] + sizeof(SData) + EXTRA_BYTES + sizeof(TSCKSUM) +
pObj->pointsPerFileBlock * pObj->schema[col - 1].bytes);
}
}
if (importHandle.pHeader[pObj->sid].compInfoOffset == 0) { // No data in this file, just write it
_write_empty_point:
if (vnodeOpenTempFilesForImport(&importHandle, pObj, fid) < 0) {
code = TSDB_CODE_OTHERS;
goto _error_merge;
}
importHandle.oldNumOfBlocks = 0;
importHandle.driftOffset += sizeof(SCompInfo);
lastKeyImported = lastKey;
for (int rowsWritten = 0; rowsWritten < rows;) {
int rowsToWrite = MIN(pVnode->cfg.rowsInFileBlock, (rows - rowsWritten) /* the rows left */);
vnodeConvertRowsToCols(pObj, payload + rowsWritten * pObj->bytesPerPoint, rowsToWrite, data, 0);
pointsImported += rowsToWrite;
compBlock.last = 1;
if (vnodeWriteBlockToFile(pObj, &compBlock, data, cdata, rowsToWrite) < 0) {
// TODO: deal with ERROR here
}
importHandle.last = compBlock.last;
checksum = taosCalcChecksum(checksum, (uint8_t *)(&compBlock), sizeof(SCompBlock));
twrite(pVnode->nfd, &compBlock, sizeof(SCompBlock));
importHandle.newNumOfBlocks++;
importHandle.driftOffset += sizeof(SCompBlock);
rowsWritten += rowsToWrite;
}
twrite(pVnode->nfd, &checksum, sizeof(TSCKSUM));
importHandle.driftOffset += sizeof(TSCKSUM);
} else { // Else if there are old data in this file.
{ // load SCompInfo and SCompBlock part
lseek(pVnode->hfd, importHandle.pHeader[pObj->sid].compInfoOffset, SEEK_SET);
if (read(pVnode->hfd, (void *)(&(importHandle.compInfo)), sizeof(SCompInfo)) < sizeof(SCompInfo)) {
dError("vid:%d sid:%d meterId:%s, failed to read .head file, reason:%s", pVnode->vnode, pObj->sid,
pObj->meterId, strerror(errno));
code = TSDB_CODE_FILE_CORRUPTED;
goto _error_merge;
}
if ((importHandle.compInfo.delimiter != TSDB_VNODE_DELIMITER) ||
(!taosCheckChecksumWhole((uint8_t *)(&(importHandle.compInfo)), sizeof(SCompInfo)))) {
dError("vid:%d sid:%d meterId:%s, .head file %s is broken, delemeter:%x", pVnode->vnode, pObj->sid,
pObj->meterId, pVnode->cfn, importHandle.compInfo.delimiter);
code = TSDB_CODE_FILE_CORRUPTED;
goto _error_merge;
}
// Check the context of SCompInfo part
if (importHandle.compInfo.uid != pObj->uid) { // The data belongs to the other meter
goto _write_empty_point;
}
importHandle.oldNumOfBlocks = importHandle.compInfo.numOfBlocks;
importHandle.last = importHandle.compInfo.last;
size = sizeof(SCompBlock) * importHandle.compInfo.numOfBlocks + sizeof(TSCKSUM);
importHandle.pBlocks = (SCompBlock *)malloc(size);
if (importHandle.pBlocks == NULL) {
dError("vid:%d sid:%d meterId:%s, failed to allocate importHandle.pBlock, size:%ul", pVnode->vnode, pObj->sid,
pObj->meterId, size);
code = TSDB_CODE_SERV_OUT_OF_MEMORY;
goto _error_merge;
}
if (read(pVnode->hfd, (void *)(importHandle.pBlocks), size) < size) {
dError("vid:%d sid:%d meterId:%s, failed to read importHandle.pBlock, reason:%s", pVnode->vnode, pObj->sid,
pObj->meterId, strerror(errno));
code = TSDB_CODE_FILE_CORRUPTED;
goto _error_merge;
}
if (!taosCheckChecksumWhole((uint8_t *)(importHandle.pBlocks), size)) {
dError("vid:%d sid:%d meterId:%s, pBlock part is broken in %s", pVnode->vnode, pObj->sid, pObj->meterId,
pVnode->cfn);
code = TSDB_CODE_FILE_CORRUPTED;
goto _error_merge;
}
}
/* Now we have _payload_, we have _importHandle.pBlocks_, just merge payload into the importHandle.pBlocks
*
* Input: payload, pObj->bytesPerBlock, rows, importHandle.pBlocks
*/
{
int payloadIter = 0;
SBlockIter blockIter = {0, 0, 0, 0};
while (1) {
if (payloadIter >= rows) { // payload end, break
// write the remaining blocks to the file
if (pVnode->nfd > 0) {
int blocksLeft = importHandle.compInfo.numOfBlocks - blockIter.oslot;
if (blocksLeft > 0) {
checksum = taosCalcChecksum(checksum, (uint8_t *)(importHandle.pBlocks + blockIter.oslot),
sizeof(SCompBlock) * blocksLeft);
if (twrite(pVnode->nfd, (void *)(importHandle.pBlocks + blockIter.oslot),
sizeof(SCompBlock) * blocksLeft) < 0) {
dError("vid:%d sid:%d meterId:%s, failed to write %s file, size:%ul, reason:%s", pVnode->vnode,
pObj->sid, pObj->meterId, pVnode->nfn, sizeof(SCompBlock) * blocksLeft, strerror(errno));
code = TSDB_CODE_OTHERS;
goto _error_merge;
}
}
if (twrite(pVnode->nfd, (void *)(&checksum), sizeof(TSCKSUM)) < 0) {
dError("vid:%d sid:%d meterId:%s, failed to write %s file, size:%ul, reason:%s", pVnode->vnode, pObj->sid,
pObj->meterId, pVnode->nfn, sizeof(TSCKSUM), strerror(errno));
code = TSDB_CODE_OTHERS;
goto _error_merge;
}
}
break;
}
if (blockIter.slot >= importHandle.compInfo.numOfBlocks) { // blocks end, break
// Should never come here
assert(false);
}
TSKEY key = KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter);
{ // Binary search the (slot, pos) which is >= key as well as nextKey
int left = blockIter.slot;
int right = importHandle.compInfo.numOfBlocks - 1;
TSKEY minKey = importHandle.pBlocks[left].keyFirst;
TSKEY maxKey = importHandle.pBlocks[right].keyLast;
assert(minKey <= maxKey);
if (key < minKey) { // Case 1. write just ahead the blockIter.slot
blockIter.slot = left;
blockIter.pos = 0;
blockIter.nextKey = minKey;
} else if (key > maxKey) { // Case 2. write to the end
if (importHandle.pBlocks[right].last) { // Case 2.1 last block in .last file, need to merge
assert(importHandle.last != 0);
importHandle.last = 0;
blockIter.slot = right;
blockIter.pos = importHandle.pBlocks[right].numOfPoints;
} else { // Case 2.2 just write after the last block
blockIter.slot = right + 1;
blockIter.pos = 0;
}
blockIter.nextKey = maxFileKey + 1;
} else { // Case 3. need to search the block for slot and pos
if (key == minKey || key == maxKey) {
if (tsAffectedRowsMod) pointsImported++;
payloadIter++;
continue;
}
// Here: minKey < key < maxKey
int mid;
TSKEY blockMinKey;
TSKEY blockMaxKey;
// Binary search the slot
do {
mid = (left + right) / 2;
blockMinKey = importHandle.pBlocks[mid].keyFirst;
blockMaxKey = importHandle.pBlocks[mid].keyLast;
assert(blockMinKey <= blockMaxKey);
if (key < blockMinKey) {
right = mid;
} else if (key > blockMaxKey) {
left = mid + 1;
} else { /* blockMinKey <= key <= blockMaxKey */
break;
}
} while (left < right);
if (key == blockMinKey || key == blockMaxKey) { // duplicate key
if (tsAffectedRowsMod) pointsImported++;
payloadIter++;
continue;
}
// Get the slot
if (key > blockMaxKey) { /* pos = 0 or pos = ? */
blockIter.slot = mid + 1;
} else { /* key < blockMinKey (pos = 0) || (key > blockMinKey && key < blockMaxKey) (pos=?) */
blockIter.slot = mid;
}
// Get the pos
assert(blockIter.slot < importHandle.compInfo.numOfBlocks);
if (key == importHandle.pBlocks[blockIter.slot].keyFirst ||
key == importHandle.pBlocks[blockIter.slot].keyLast) {
if (tsAffectedRowsMod) pointsImported++;
payloadIter++;
continue;
}
assert(key < importHandle.pBlocks[blockIter.slot].keyLast);
/* */
if (key < importHandle.pBlocks[blockIter.slot].keyFirst) {
blockIter.pos = 0;
blockIter.nextKey = importHandle.pBlocks[blockIter.slot].keyFirst;
} else {
SCompBlock *pBlock = importHandle.pBlocks + blockIter.slot;
if (pBlock->sversion != pObj->sversion) { /*TODO*/
}
if (vnodeLoadNeededBlockData(pObj, &importHandle, blockIter.slot, DATA_LOAD_TIMESTAMP, &code) < 0) {
goto _error_merge;
}
int pos = (*vnodeSearchKeyFunc[pObj->searchAlgorithm])(
importHandle.data[PRIMARYKEY_TIMESTAMP_COL_INDEX]->data, pBlock->numOfPoints, key, TSQL_SO_ASC);
assert(pos != 0);
if (KEY_AT_INDEX(importHandle.data[PRIMARYKEY_TIMESTAMP_COL_INDEX]->data, sizeof(TSKEY), pos) == key) {
if (tsAffectedRowsMod) pointsImported++;
payloadIter++;
continue;
}
blockIter.pos = pos;
blockIter.nextKey = (blockIter.slot + 1 < importHandle.compInfo.numOfBlocks)
? importHandle.pBlocks[blockIter.slot + 1].keyFirst
: maxFileKey + 1;
// Need to merge with this block
if (importHandle.pBlocks[blockIter.slot].last) { // this is to merge with the last block
assert((blockIter.slot == (importHandle.compInfo.numOfBlocks - 1)));
importHandle.last = 0;
}
}
}
}
int aslot = MIN(blockIter.slot, importHandle.compInfo.numOfBlocks - 1);
int64_t sversion = importHandle.pBlocks[aslot].sversion;
if (sversion != pObj->sversion) {
code = TSDB_CODE_OTHERS;
goto _error_merge;
}
// Open the new .t file if not opened yet.
if (pVnode->nfd <= 0) {
if (vnodeOpenTempFilesForImport(&importHandle, pObj, fid) < 0) {
code = TSDB_CODE_OTHERS;
goto _error_merge;
}
}
if (blockIter.slot > blockIter.oslot) { // write blocks in range [blockIter.oslot, blockIter.slot) to .t file
checksum = taosCalcChecksum(checksum, (uint8_t *)(importHandle.pBlocks + blockIter.oslot),
sizeof(SCompBlock) * (blockIter.slot - blockIter.oslot));
if (twrite(pVnode->nfd, (void *)(importHandle.pBlocks + blockIter.oslot),
sizeof(SCompBlock) * (blockIter.slot - blockIter.oslot)) < 0) {
dError("vid:%d sid:%d meterId:%s, failed to write %s file, size:%ul, reason:%s", pVnode->vnode, pObj->sid,
pObj->meterId, pVnode->nfn, sizeof(SCompBlock) * (blockIter.slot - blockIter.oslot),
strerror(errno));
code = TSDB_CODE_OTHERS;
goto _error_merge;
}
blockIter.oslot = blockIter.slot;
}
if (blockIter.pos == 0) { // No need to merge
// copy payload part to data
int rowOffset = 0;
for (; payloadIter < rows; rowOffset++) {
if (KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter) >= blockIter.nextKey) break;
vnodeConvertRowsToCols(pObj, payload + pObj->bytesPerPoint * payloadIter, 1, data, rowOffset);
pointsImported++;
lastKeyImported = KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter);
payloadIter++;
}
// write directly to .data file
compBlock.last = 0;
if (vnodeWriteBlockToFile(pObj, &compBlock, data, cdata, rowOffset) < 0) {
// TODO: Deal with the ERROR here
}
checksum = taosCalcChecksum(checksum, (uint8_t *)(&compBlock), sizeof(SCompBlock));
if (twrite(pVnode->nfd, &compBlock, sizeof(SCompBlock)) < 0) {
// TODO : deal with the ERROR here
}
importHandle.newNumOfBlocks++;
importHandle.driftOffset += sizeof(SCompBlock);
} else { // Merge block and payload from payloadIter
if (vnodeLoadNeededBlockData(pObj, &importHandle, blockIter.slot,
DATA_LOAD_TIMESTAMP | DATA_LOAD_OTHER_DATA, &code) < 0) { // Load neccessary blocks
goto _error_merge;
}
importHandle.oldNumOfBlocks--;
importHandle.driftOffset -= sizeof(SCompBlock);
int rowOffset = blockIter.pos; // counter for data
// Copy the front part
for (int col = 0; col < pObj->numOfColumns; col++) {
memcpy((void *)(data[col]->data), (void *)(importHandle.data[col]->data),
pObj->schema[col].bytes * blockIter.pos);
}
// Merge part
while (1) {
if (rowOffset >= pVnode->cfg.rowsInFileBlock) { // data full in a block to commit
compBlock.last = 0;
if (vnodeWriteBlockToFile(pObj, &compBlock, data, cdata, rowOffset) < 0) {
// TODO : deal with the ERROR here
}
checksum = taosCalcChecksum(checksum, (uint8_t *)(&compBlock), sizeof(SCompBlock));
if (twrite(pVnode->nfd, (void *)(&compBlock), sizeof(SCompBlock)) < 0) {
dError("vid:%d sid:%d meterId:%s, failed to write %s file, size:%ul, reason:%s", pVnode->vnode,
pObj->sid, pObj->meterId, pVnode->nfn, sizeof(SCompBlock), strerror(errno));
goto _error_merge;
}
importHandle.newNumOfBlocks++;
importHandle.driftOffset += sizeof(SCompBlock);
rowOffset = 0;
}
if ((payloadIter >= rows || KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter) >= blockIter.nextKey) &&
blockIter.pos >= importHandle.pBlocks[blockIter.slot].numOfPoints)
break;
if (payloadIter >= rows ||
KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter) >= blockIter.nextKey) { // payload end
for (int col = 0; col < pObj->numOfColumns; col++) {
memcpy(data[col]->data + rowOffset * pObj->schema[col].bytes,
importHandle.data[col]->data + pObj->schema[col].bytes * blockIter.pos, pObj->schema[col].bytes);
}
blockIter.pos++;
rowOffset++;
} else if (blockIter.pos >= importHandle.pBlocks[blockIter.slot].numOfPoints) { // block end
vnodeConvertRowsToCols(pObj, payload + pObj->bytesPerPoint * payloadIter, 1, data, rowOffset);
pointsImported++;
lastKeyImported = KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter);
payloadIter++;
rowOffset++;
} else {
if (KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter) ==
KEY_AT_INDEX(importHandle.data[PRIMARYKEY_TIMESTAMP_COL_INDEX]->data, sizeof(TSKEY),
blockIter.pos)) { // duplicate key
if (tsAffectedRowsMod) pointsImported++;
payloadIter++;
continue;
} else if (KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter) <
KEY_AT_INDEX(importHandle.data[PRIMARYKEY_TIMESTAMP_COL_INDEX]->data, sizeof(TSKEY),
blockIter.pos)) {
vnodeConvertRowsToCols(pObj, payload + pObj->bytesPerPoint * payloadIter, 1, data, rowOffset);
pointsImported++;
lastKeyImported = KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter);
payloadIter++;
rowOffset++;
} else {
for (int col = 0; col < pObj->numOfColumns; col++) {
memcpy(data[col]->data + rowOffset * pObj->schema[col].bytes,
importHandle.data[col]->data + pObj->schema[col].bytes * blockIter.pos,
pObj->schema[col].bytes);
}
blockIter.pos++;
rowOffset++;
}
}
}
if (rowOffset > 0) { // data full in a block to commit
compBlock.last = 0;
if (vnodeWriteBlockToFile(pObj, &compBlock, data, cdata, rowOffset) < 0) {
// TODO : deal with the ERROR here
}
checksum = taosCalcChecksum(checksum, (uint8_t *)(&compBlock), sizeof(SCompBlock));
if (twrite(pVnode->nfd, (void *)(&compBlock), sizeof(SCompBlock)) < 0) {
dError("vid:%d sid:%d meterId:%s, failed to write %s file, size:%ul, reason:%s", pVnode->vnode, pObj->sid,
pObj->meterId, pVnode->nfn, sizeof(SCompBlock), strerror(errno));
goto _error_merge;
}
importHandle.newNumOfBlocks++;
importHandle.driftOffset += sizeof(SCompBlock);
rowOffset = 0;
}
blockIter.slot++;
blockIter.oslot = blockIter.slot;
}
}
}
}
// Write the SCompInfo part
if (vnodeCloseImportFiles(pObj, &importHandle) < 0) {
code = TSDB_CODE_OTHERS;
goto _error_merge;
}
pImport->importedRows += pointsImported;
pthread_mutex_lock(&(pPool->vmutex));
if (pInfo->numOfBlocks > 0) {
int slot = (pInfo->currentSlot - pInfo->numOfBlocks + 1 + pInfo->maxBlocks) % pInfo->maxBlocks;
TSKEY firstKeyInCache = *((TSKEY *)(pInfo->cacheBlocks[slot]->offset[0]));
// data may be in commited cache, cache shall be released
if (lastKeyImported > firstKeyInCache) {
while (slot != pInfo->commitSlot) {
SCacheBlock *pCacheBlock = pInfo->cacheBlocks[slot];
vnodeFreeCacheBlock(pCacheBlock);
slot = (slot + 1 + pInfo->maxBlocks) % pInfo->maxBlocks;
}
if (pInfo->commitPoint == pObj->pointsPerBlock) {
if (pInfo->cacheBlocks[pInfo->commitSlot]->pMeterObj == pObj) {
vnodeFreeCacheBlock(pInfo->cacheBlocks[pInfo->commitSlot]);
}
}
}
}
pthread_mutex_unlock(&(pPool->vmutex));
// TODO: free the allocated memory
tfree(buffer);
tfree(cbuffer);
tfree(importHandle.pHeader);
tfree(importHandle.pBlocks);
tfree(importHandle.pField);
tfree(importHandle.buffer);
tfree(importHandle.temp);
tfree(importHandle.tempBuffer);
return code;
_error_merge:
tfree(buffer);
tfree(cbuffer);
tfree(importHandle.pHeader);
tfree(importHandle.pBlocks);
tfree(importHandle.pField);
tfree(importHandle.buffer);
tfree(importHandle.temp);
tfree(importHandle.tempBuffer);
close(pVnode->dfd);
pVnode->dfd = 0;
close(pVnode->hfd);
pVnode->hfd = 0;
close(pVnode->lfd);
pVnode->lfd = 0;
if (pVnode->nfd > 0) {
close(pVnode->nfd);
pVnode->nfd = 0;
remove(pVnode->nfn);
}
return code;
}
#define FORWARD_ITER(iter, step, slotLimit, posLimit) \
{ \
if ((iter.pos) + (step) < (posLimit)) { \
(iter.pos) = (iter.pos) + (step); \
} else { \
(iter.pos) = 0; \
(iter.slot) = ((iter.slot) + 1) % (slotLimit); \
} \
}
int isCacheEnd(SBlockIter iter, SMeterObj *pTable) {
SCacheInfo *pInfo = (SCacheInfo *)(pTable->pCache);
int slot = 0;
int pos = 0;
if (pInfo->cacheBlocks[pInfo->currentSlot]->numOfPoints == pTable->pointsPerBlock) {
slot = (pInfo->currentSlot + 1) % (pInfo->maxBlocks);
pos = 0;
} else {
slot = pInfo->currentSlot;
pos = pInfo->cacheBlocks[pInfo->currentSlot]->numOfPoints;
}
return ((iter.slot == slot) && (iter.pos == pos));
}
static void vnodeFlushMergeBuffer(SMergeBuffer *pBuffer, SBlockIter *pWriteIter, SBlockIter *pCacheIter,
SMeterObj *pObj, SCacheInfo *pInfo, int checkBound) {
// Function to flush the merge buffer data to cache
if (pWriteIter->pos == pObj->pointsPerBlock) {
pWriteIter->pos = 0;
pWriteIter->slot = (pWriteIter->slot + 1) % pInfo->maxBlocks;
}
while (pBuffer->spos != pBuffer->epos) {
if (checkBound && pWriteIter->slot == pCacheIter->slot && pWriteIter->pos == pCacheIter->pos) break;
for (int col = 0; col < pObj->numOfColumns; col++) {
memcpy(pInfo->cacheBlocks[pWriteIter->slot]->offset[col] + pObj->schema[col].bytes * pWriteIter->pos,
pBuffer->offset[col] + pObj->schema[col].bytes * pBuffer->spos, pObj->schema[col].bytes);
}
if (pWriteIter->pos + 1 < pObj->pointsPerBlock) {
(pWriteIter->pos)++;
} else {
pInfo->cacheBlocks[pWriteIter->slot]->numOfPoints = pWriteIter->pos + 1;
pWriteIter->slot = (pWriteIter->slot + 1) % pInfo->maxBlocks;
pWriteIter->pos = 0;
}
pBuffer->spos = (pBuffer->spos + 1) % pBuffer->totalRows;
}
if ((!checkBound) && pWriteIter->pos != 0) {
pInfo->cacheBlocks[pWriteIter->slot]->numOfPoints = pWriteIter->pos;
}
}
int vnodeImportDataToCache(SImportInfo *pImport, const char *payload, const int rows) {
SMeterObj * pObj = pImport->pObj;
SVnodeObj * pVnode = vnodeList + pObj->vnode;
int code = -1;
SCacheInfo * pInfo = (SCacheInfo *)(pObj->pCache);
int payloadIter;
SCachePool * pPool = (SCachePool *)(pVnode->pCachePool);
int isCacheIterEnd = 0;
int spayloadIter = 0;
int isAppendData = 0;
int rowsImported = 0;
int totalRows = 0;
size_t size = 0;
SMergeBuffer *pBuffer = NULL;
TSKEY firstKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, 0);
TSKEY lastKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, rows - 1);
assert(firstKey <= lastKey && firstKey > pObj->lastKeyOnFile);
// TODO: make this condition less strict
if (pObj->freePoints < rows || pObj->freePoints < (pObj->pointsPerBlock << 1)) { // No free room to hold the data
dError("vid:%d sid:%d id:%s, import failed, cache is full, freePoints:%d", pObj->vnode, pObj->sid, pObj->meterId,
pObj->freePoints);
pImport->importedRows = 0;
pImport->commit = 1;
code = TSDB_CODE_ACTION_IN_PROGRESS;
return code;
}
if (pInfo->numOfBlocks == 0) {
if (vnodeAllocateCacheBlock(pObj) < 0) {
pImport->importedRows = 0;
pImport->commit = 1;
code = TSDB_CODE_ACTION_IN_PROGRESS;
return code;
}
}
// Find the first importable record from payload
pImport->lastKey = lastKey;
for (payloadIter = 0; payloadIter < rows; payloadIter++) {
TSKEY key = KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter);
if (key == pObj->lastKey) {
if (tsAffectedRowsMod) rowsImported++;
continue;
}
if (key > pObj->lastKey) { // Just as insert
pImport->slot = pInfo->currentSlot;
pImport->pos = pInfo->cacheBlocks[pImport->slot]->numOfPoints;
isCacheIterEnd = 1;
break;
} else {
pImport->firstKey = key;
if (vnodeFindKeyInCache(pImport, 1) < 0) {
goto _exit;
}
if (pImport->firstKey != pImport->key) break;
if (tsAffectedRowsMod) rowsImported++;
}
}
if (payloadIter == rows) {
pImport->importedRows += rowsImported;
code = 0;
goto _exit;
}
spayloadIter = payloadIter;
if (pImport->pos == pObj->pointsPerBlock) assert(isCacheIterEnd);
// Allocate a new merge buffer work as buffer
totalRows = pObj->pointsPerBlock + rows - payloadIter + 1;
size = sizeof(SMergeBuffer) + sizeof(char *) * pObj->numOfColumns + pObj->bytesPerPoint * totalRows;
pBuffer = (SMergeBuffer *)malloc(size);
if (pBuffer == NULL) {
dError("vid:%d sid:%d meterId:%s, failed to allocate memory, size:%d", pObj->vnode, pObj->sid, pObj->meterId, size);
return TSDB_CODE_SERV_OUT_OF_MEMORY;
}
pBuffer->spos = 0;
pBuffer->epos = 0;
pBuffer->totalRows = totalRows;
pBuffer->offset[0] = (char *)pBuffer + sizeof(SMergeBuffer) + sizeof(char *) * pObj->numOfColumns;
for (int col = 1; col < pObj->numOfColumns; col++) {
pBuffer->offset[col] = pBuffer->offset[col - 1] + pObj->schema[col - 1].bytes * totalRows;
}
// TODO: take pImport->pos = pObj->pointsPerBlock into consideration
{ // Do the merge staff
SBlockIter cacheIter = {pImport->slot, pImport->pos, 0, 0}; // Iter to traverse old cache data
SBlockIter writeIter = {pImport->slot, pImport->pos, 0, 0}; // Iter to write data to cache
int availPoints = pObj->pointsPerBlock - pInfo->cacheBlocks[pInfo->currentSlot]->numOfPoints;
assert(availPoints >= 0);
while (1) {
if ((payloadIter >= rows) && isCacheIterEnd) break;
if ((pBuffer->epos + 1) % pBuffer->totalRows == pBuffer->spos) { // merge buffer is full, flush
vnodeFlushMergeBuffer(pBuffer, &writeIter, &cacheIter, pObj, pInfo, 1);
}
TSKEY payloadKey = (payloadIter < rows) ? KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter) : INT64_MAX;
TSKEY cacheKey = (isCacheIterEnd) ? INT64_MAX : KEY_AT_INDEX(pInfo->cacheBlocks[cacheIter.slot]->offset[0], sizeof(TSKEY), cacheIter.pos);
if (cacheKey < payloadKey) { // if (payload end || (cacheIter not end && payloadKey > blockKey)), consume cache
for (int col = 0; col < pObj->numOfColumns; col++) {
memcpy(pBuffer->offset[col] + pObj->schema[col].bytes * pBuffer->epos,
pInfo->cacheBlocks[cacheIter.slot]->offset[col] + pObj->schema[col].bytes * cacheIter.pos,
pObj->schema[col].bytes);
}
FORWARD_ITER(cacheIter, 1, pInfo->maxBlocks, pObj->pointsPerBlock);
isCacheIterEnd = isCacheEnd(cacheIter, pObj);
} else if (cacheKey > payloadKey) { // cacheIter end || (payloadIter not end && payloadKey < blockKey), consume payload
if (availPoints == 0) { // Need to allocate a new cache block
pthread_mutex_lock(&(pPool->vmutex));
// TODO: Need to check if there are enough slots to hold a new one
SCacheBlock *pNewBlock = vnodeGetFreeCacheBlock(pVnode);
if (pNewBlock == NULL) { // Failed to allocate a new cache block, need to commit and loop over the remaining cache records
pthread_mutex_unlock(&(pPool->vmutex));
payloadIter = rows;
code = TSDB_CODE_ACTION_IN_PROGRESS;
pImport->commit = 1;
continue;
}
assert(pInfo->numOfBlocks <= pInfo->maxBlocks);
if (pInfo->numOfBlocks == pInfo->maxBlocks) {
vnodeFreeCacheBlock(pInfo->cacheBlocks[(pInfo->currentSlot + 1) % pInfo->maxBlocks]);
}
pNewBlock->pMeterObj = pObj;
pNewBlock->offset[0] = (char *)pNewBlock + sizeof(SCacheBlock) + sizeof(char *) * pObj->numOfColumns;
for (int col = 1; col < pObj->numOfColumns; col++)
pNewBlock->offset[col] = pNewBlock->offset[col - 1] + pObj->schema[col - 1].bytes * pObj->pointsPerBlock;
int newSlot = (writeIter.slot + 1) % pInfo->maxBlocks;
pInfo->blocks++;
int tblockId = pInfo->blocks;
if (writeIter.slot != pInfo->currentSlot) {
for (int tslot = pInfo->currentSlot; tslot != writeIter.slot;) {
int nextSlot = (tslot + 1) % pInfo->maxBlocks;
pInfo->cacheBlocks[nextSlot] = pInfo->cacheBlocks[tslot];
pInfo->cacheBlocks[nextSlot]->slot = nextSlot;
pInfo->cacheBlocks[nextSlot]->blockId = tblockId--;
tslot = (tslot - 1 + pInfo->maxBlocks) % pInfo->maxBlocks;
}
}
int index = pNewBlock->index;
if (cacheIter.slot == writeIter.slot) {
pNewBlock->numOfPoints = pInfo->cacheBlocks[cacheIter.slot]->numOfPoints;
int pointsLeft = pInfo->cacheBlocks[cacheIter.slot]->numOfPoints - cacheIter.pos;
if (pointsLeft > 0) {
for (int col = 0; col < pObj->numOfColumns; col++) {
memcpy((void *)(pNewBlock->offset[col] + pObj->schema[col].bytes*cacheIter.pos),
pInfo->cacheBlocks[cacheIter.slot]->offset[col] + pObj->schema[col].bytes * cacheIter.pos,
pObj->schema[col].bytes * pointsLeft);
}
}
}
pNewBlock->blockId = tblockId;
pNewBlock->slot = newSlot;
pNewBlock->index = index;
pInfo->cacheBlocks[newSlot] = pNewBlock;
pInfo->numOfBlocks++;
pInfo->unCommittedBlocks++;
pInfo->currentSlot = (pInfo->currentSlot + 1) % pInfo->maxBlocks;
pthread_mutex_unlock(&(pPool->vmutex));
cacheIter.slot = (cacheIter.slot + 1) % pInfo->maxBlocks;
// move a cache of data forward
availPoints = pObj->pointsPerBlock;
}
int offset = 0;
for (int col = 0; col < pObj->numOfColumns; col++) {
memcpy(pBuffer->offset[col] + pObj->schema[col].bytes * pBuffer->epos,
payload + pObj->bytesPerPoint * payloadIter + offset, pObj->schema[col].bytes);
offset += pObj->schema[col].bytes;
}
if (spayloadIter == payloadIter) {// update pVnode->firstKey
pthread_mutex_lock(&(pVnode->vmutex));
if (KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter) < pVnode->firstKey) pVnode->firstKey = firstKey;
pthread_mutex_unlock(&(pVnode->vmutex));
}
if (isCacheIterEnd) {
pObj->lastKey = KEY_AT_INDEX(payload, pObj->bytesPerPoint, payloadIter);
if (!isAppendData) isAppendData = 1;
}
rowsImported++;
availPoints--;
payloadIter++;
} else {
if (tsAffectedRowsMod) rowsImported++;
payloadIter++;
continue;
}
pBuffer->epos = (pBuffer->epos + 1) % pBuffer->totalRows;
}
if (pBuffer->spos != pBuffer->epos) { // Flush the remaining data in the merge buffer
vnodeFlushMergeBuffer(pBuffer, &writeIter, &cacheIter, pObj, pInfo, 0);
} else {
// Should never come here
assert(false);
}
if (isAppendData) {
pthread_mutex_lock(&(pVnode->vmutex));
if (pObj->lastKey > pVnode->lastKey) pVnode->lastKey = pObj->lastKey;
pthread_mutex_unlock(&(pVnode->vmutex));
}
}
pImport->importedRows += rowsImported;
atomic_fetch_sub_32(&(pObj->freePoints), rowsImported);
code = TSDB_CODE_SUCCESS;
_exit:
tfree(pBuffer);
return code;
}
int vnodeImportDataToFiles(SImportInfo *pImport, char *payload, const int rows) {
int code = 0;
// TODO : Check the correctness of pObj and pVnode
SMeterObj *pObj = (SMeterObj *)(pImport->pObj);
SVnodeObj *pVnode = vnodeList + pObj->vnode;
int64_t delta = pVnode->cfg.daysPerFile * tsMsPerDay[(uint8_t)pVnode->cfg.precision];
int sfid = KEY_AT_INDEX(payload, pObj->bytesPerPoint, 0) / delta;
int efid = KEY_AT_INDEX(payload, pObj->bytesPerPoint, rows - 1) / delta;
for (int fid = sfid; fid <= efid; fid++) {
TSKEY skey = fid * delta;
TSKEY ekey = skey + delta - 1;
int srow = 0, nrows = 0;
if (vnodeSearchKeyInRange(payload, pObj->bytesPerPoint, rows, skey, ekey, &srow, &nrows) < 0) continue;
assert(nrows > 0);
dTrace("vid:%d sid:%d meterId:%s, %d rows of data will be imported to file %d, srow:%d firstKey:%" PRId64 " lastKey:%" PRId64,
pObj->vnode, pObj->sid, pObj->meterId, nrows, fid, srow, KEY_AT_INDEX(payload, pObj->bytesPerPoint, srow),
KEY_AT_INDEX(payload, pObj->bytesPerPoint, (srow + nrows - 1)));
code = vnodeMergeDataIntoFile(pImport, payload + (srow * pObj->bytesPerPoint), nrows, fid);
if (code != TSDB_CODE_SUCCESS) break;
}
return code;
}
// TODO : add offset in pShell to make it avoid repeatedly deal with messages
int vnodeImportData(SMeterObj *pObj, SImportInfo *pImport) {
int code = 0;
int srow = 0, nrows = 0;
SVnodeObj * pVnode = vnodeList + pObj->vnode;
SCachePool *pPool = (SCachePool *)(pVnode->pCachePool);
// 1. import data in range (pObj->lastKeyOnFile, INT64_MAX) into cache
if (vnodeSearchKeyInRange(pImport->payload, pObj->bytesPerPoint, pImport->rows, pObj->lastKeyOnFile + 1, INT64_MAX,
&srow, &nrows) >= 0) {
assert(nrows > 0);
code = vnodeImportDataToCache(pImport, pImport->payload + pObj->bytesPerPoint * srow, nrows);
if (pImport->commit) { // Need to commit now
pPool->commitInProcess = 0;
vnodeProcessCommitTimer(pVnode, NULL);
return code;
}
if (code != TSDB_CODE_SUCCESS) return code;
}
// 2. import data (0, pObj->lastKeyOnFile) into files
if (vnodeSearchKeyInRange(pImport->payload, pObj->bytesPerPoint, pImport->rows, 0, pObj->lastKeyOnFile - 1, &srow,
&nrows) >= 0) {
assert(nrows > 0);
code = vnodeImportDataToFiles(pImport, pImport->payload + pObj->bytesPerPoint * srow, nrows);
}
pPool->commitInProcess = 0;
return code;
}
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "os.h"
#include "trpc.h"
#include "tschemautil.h"
#include "ttime.h"
#include "tutil.h"
#include "vnode.h"
#include "vnodeShell.h"
#include "vnodeUtil.h"
#include "vnodeStatus.h"
#define VALID_TIMESTAMP(key, curKey, prec) (((key) >= 0) && ((key) <= ((curKey) + 36500 * tsMsPerDay[prec])))
int tsMeterSizeOnFile;
void vnodeUpdateMeter(void *param, void *tmdId);
void vnodeRecoverMeterObjectFile(int vnode);
int (*vnodeProcessAction[])(SMeterObj *, char *, int, char, void *, int, int *, TSKEY) = {vnodeInsertPoints,
vnodeImportPoints};
void vnodeFreeMeterObj(SMeterObj *pObj) {
if (pObj == NULL) return;
dTrace("vid:%d sid:%d id:%s, meter is cleaned up", pObj->vnode, pObj->sid, pObj->meterId);
vnodeFreeCacheInfo(pObj);
if (vnodeList[pObj->vnode].meterList != NULL) {
vnodeList[pObj->vnode].meterList[pObj->sid] = NULL;
}
memset(pObj->meterId, 0, tListLen(pObj->meterId));
tfree(pObj);
}
int vnodeUpdateVnodeStatistic(FILE *fp, SVnodeObj *pVnode) {
fseek(fp, TSDB_FILE_HEADER_VERSION_SIZE, SEEK_SET);
fwrite(&(pVnode->vnodeStatistic), sizeof(SVnodeStatisticInfo), 1, fp);
return 0;
}
void vnodeUpdateVnodeFileHeader(FILE *fp, SVnodeObj *pVnode) {
fseek(fp, TSDB_FILE_HEADER_LEN * 1 / 4, SEEK_SET);
#ifdef _TD_ARM_32_
fprintf(fp, "%lld %lld %lld ", pVnode->lastCreate, pVnode->lastRemove, pVnode->version);
fprintf(fp, "%lld %d %d ", pVnode->lastKeyOnFile, pVnode->fileId, pVnode->numOfFiles);
#else
fprintf(fp, "%ld %ld %ld ", pVnode->lastCreate, pVnode->lastRemove, pVnode->version);
fprintf(fp, "%ld %d %d ", pVnode->lastKeyOnFile, pVnode->fileId, pVnode->numOfFiles);
#endif
}
int vnodeCreateMeterObjFile(int vnode) {
FILE * fp;
char fileName[TSDB_FILENAME_LEN];
int32_t size;
// SMeterObj *pObj;
sprintf(fileName, "%s/vnode%d/meterObj.v%d", tsDirectory, vnode, vnode);
fp = fopen(fileName, "w+");
if (fp == NULL) {
dError("failed to create vnode:%d file:%s, errno:%d, reason:%s", vnode, fileName, errno, strerror(errno));
if (errno == EACCES) {
return TSDB_CODE_NO_DISK_PERMISSIONS;
} else if (errno == ENOSPC) {
return TSDB_CODE_SERV_NO_DISKSPACE;
} else {
return TSDB_CODE_VG_INIT_FAILED;
}
} else {
vnodeCreateFileHeader(fp);
vnodeUpdateVnodeFileHeader(fp, vnodeList + vnode);
fseek(fp, TSDB_FILE_HEADER_LEN, SEEK_SET);
size = sizeof(SMeterObjHeader) * vnodeList[vnode].cfg.maxSessions + sizeof(TSCKSUM);
tfree(vnodeList[vnode].meterIndex);
vnodeList[vnode].meterIndex = calloc(1, size);
taosCalcChecksumAppend(0, (uint8_t *)(vnodeList[vnode].meterIndex), size);
fwrite(vnodeList[vnode].meterIndex, size, 1, fp);
fclose(fp);
}
return TSDB_CODE_SUCCESS;
}
FILE *vnodeOpenMeterObjFile(int vnode) {
FILE * fp;
char fileName[TSDB_FILENAME_LEN];
struct stat fstat;
// check if directory exists
sprintf(fileName, "%s/vnode%d", tsDirectory, vnode);
if (stat(fileName, &fstat) < 0) return NULL;
sprintf(fileName, "%s/vnode%d/meterObj.v%d", tsDirectory, vnode, vnode);
if (stat(fileName, &fstat) < 0) return NULL;
fp = fopen(fileName, "r+");
if (fp != NULL) {
if (vnodeCheckFileIntegrity(fp) < 0) {
dError("file:%s is corrupted, need to restore it first, exit program", fileName);
fclose(fp);
// todo: how to recover
exit(1);
}
} else {
dError("failed to open %s, reason:%s", fileName, strerror(errno));
}
return fp;
}
int vnodeSaveMeterObjToFile(SMeterObj *pObj) {
int64_t offset, length, new_length, new_offset;
FILE * fp;
SVnodeObj *pVnode = &vnodeList[pObj->vnode];
char * buffer = NULL;
fp = vnodeOpenMeterObjFile(pObj->vnode);
if (fp == NULL) return -1;
buffer = (char *)malloc(tsMeterSizeOnFile);
if (buffer == NULL) {
dError("Failed to allocate memory while saving meter object to file, meterId", pObj->meterId);
fclose(fp);
return -1;
}
offset = pVnode->meterIndex[pObj->sid].offset;
length = pVnode->meterIndex[pObj->sid].length;
new_length = offsetof(SMeterObj, reserved) + pObj->numOfColumns * sizeof(SColumn) + pObj->sqlLen + sizeof(TSCKSUM);
memcpy(buffer, pObj, offsetof(SMeterObj, reserved));
memcpy(buffer + offsetof(SMeterObj, reserved), pObj->schema, pObj->numOfColumns * sizeof(SColumn));
memcpy(buffer + offsetof(SMeterObj, reserved) + pObj->numOfColumns * sizeof(SColumn), pObj->pSql, pObj->sqlLen);
taosCalcChecksumAppend(0, (uint8_t *)buffer, new_length);
if (offset == 0 || length < new_length) { // New, append to file end
fseek(fp, 0, SEEK_END);
new_offset = ftell(fp);
fwrite(buffer, new_length, 1, fp);
pVnode->meterIndex[pObj->sid].offset = new_offset;
pVnode->meterIndex[pObj->sid].length = new_length;
} else if (offset < 0) { // deleted meter, append to end of file
fseek(fp, -offset, SEEK_SET);
fwrite(buffer, new_length, 1, fp);
pVnode->meterIndex[pObj->sid].offset = -offset;
pVnode->meterIndex[pObj->sid].length = new_length;
} else { // meter exists, overwrite it, offset > 0
fseek(fp, offset, SEEK_SET);
fwrite(buffer, new_length, 1, fp);
pVnode->meterIndex[pObj->sid].offset = (pObj->meterId[0] == 0) ? -offset : offset;
pVnode->meterIndex[pObj->sid].length = new_length;
}
// taosCalcChecksumAppend(0, pVnode->meterIndex, sizeof(SMeterObjHeader)*pVnode->cfg.maxSessions+sizeof(TSCKSUM));
// NOTE: no checksum, since it makes creating table slow
fseek(fp, TSDB_FILE_HEADER_LEN + sizeof(SMeterObjHeader) * pObj->sid, SEEK_SET);
fwrite(&(pVnode->meterIndex[pObj->sid]), sizeof(SMeterObjHeader), 1, fp);
// update checksum
// fseek(fp, TSDB_FILE_HEADER_LEN+sizeof(SMeterObjHeader)*(pVnode->cfg.maxSessions), SEEK_SET);
// fwrite(((char *)(pVnode->meterIndex) + sizeof(SMeterObjHeader)*(pVnode->cfg.maxSessions)), sizeof(TSCKSUM), 1, fp);
tfree(buffer);
vnodeUpdateVnodeStatistic(fp, pVnode);
vnodeUpdateVnodeFileHeader(fp, pVnode);
/* vnodeUpdateFileCheckSum(fp); */
fclose(fp);
return 0;
}
int vnodeSaveAllMeterObjToFile(int vnode) {
int64_t offset, length, new_length, new_offset;
FILE * fp;
SMeterObj *pObj;
SVnodeObj *pVnode = &vnodeList[vnode];
char * buffer = NULL;
fp = vnodeOpenMeterObjFile(vnode);
if (fp == NULL) return -1;
buffer = (char *)malloc(tsMeterSizeOnFile);
if (buffer == NULL) {
dError("Failed to allocate memory while saving all meter objects to file");
return -1;
}
for (int sid = 0; sid < pVnode->cfg.maxSessions; ++sid) {
pObj = pVnode->meterList[sid];
if (pObj == NULL) continue;
offset = pVnode->meterIndex[sid].offset;
length = pVnode->meterIndex[sid].length;
new_length = offsetof(SMeterObj, reserved) + pObj->numOfColumns * sizeof(SColumn) + pObj->sqlLen + sizeof(TSCKSUM);
memcpy(buffer, pObj, offsetof(SMeterObj, reserved));
memcpy(buffer + offsetof(SMeterObj, reserved), pObj->schema, pObj->numOfColumns * sizeof(SColumn));
memcpy(buffer + offsetof(SMeterObj, reserved) + pObj->numOfColumns * sizeof(SColumn), pObj->pSql, pObj->sqlLen);
taosCalcChecksumAppend(0, (uint8_t *)buffer, new_length);
if (offset == 0 || length > new_length) { // New, append to file end
new_offset = fseek(fp, 0, SEEK_END);
fwrite(buffer, new_length, 1, fp);
pVnode->meterIndex[sid].offset = new_offset;
pVnode->meterIndex[sid].length = new_length;
} else if (offset < 0) { // deleted meter, append to end of file
fseek(fp, -offset, SEEK_SET);
fwrite(buffer, new_length, 1, fp);
pVnode->meterIndex[sid].offset = -offset;
pVnode->meterIndex[sid].length = new_length;
} else { // meter exists, overwrite it, offset > 0
fseek(fp, offset, SEEK_SET);
fwrite(buffer, new_length, 1, fp);
pVnode->meterIndex[sid].offset = offset;
pVnode->meterIndex[sid].length = new_length;
}
}
// taosCalcChecksumAppend(0, pVnode->meterIndex, sizeof(SMeterObjHeader)*pVnode->cfg.maxSessions+sizeof(TSCKSUM));
fseek(fp, TSDB_FILE_HEADER_LEN, SEEK_SET);
fwrite(pVnode->meterIndex, sizeof(SMeterObjHeader) * pVnode->cfg.maxSessions + sizeof(TSCKSUM), 1, fp);
tfree(buffer);
vnodeUpdateVnodeStatistic(fp, pVnode);
vnodeUpdateVnodeFileHeader(fp, pVnode);
/* vnodeUpdateFileCheckSum(fp); */
fclose(fp);
return 0;
}
int vnodeSaveVnodeCfg(int vnode, SVnodeCfg *pCfg, SVPeerDesc *pDesc) {
FILE *fp;
fp = vnodeOpenMeterObjFile(vnode);
if (fp == NULL) {
dError("failed to open vnode:%d file", vnode);
return -1;
}
fseek(fp, TSDB_FILE_HEADER_LEN * 2 / 4, SEEK_SET);
fwrite(pCfg, sizeof(SVnodeCfg), 1, fp);
char temp[TSDB_FILE_HEADER_LEN / 4];
memset(temp, 0, sizeof(temp));
fseek(fp, TSDB_FILE_HEADER_LEN * 3 / 4, SEEK_SET);
fwrite(temp, sizeof(temp), 1, fp);
if (pCfg->replications >= 1) {
fseek(fp, TSDB_FILE_HEADER_LEN * 3 / 4, SEEK_SET);
fwrite(pDesc, sizeof(SVPeerDesc), pCfg->replications, fp);
}
/* vnodeUpdateFileCheckSum(fp); */
fclose(fp);
return TSDB_CODE_SUCCESS;
}
int vnodeSaveVnodeInfo(int vnode) {
FILE * fp;
SVnodeObj *pVnode = &vnodeList[vnode];
fp = vnodeOpenMeterObjFile(vnode);
if (fp == NULL) return -1;
vnodeUpdateVnodeFileHeader(fp, pVnode);
/* vnodeUpdateFileCheckSum(fp); */
fclose(fp);
return 0;
}
int vnodeRestoreMeterObj(char *buffer, int64_t length) {
SMeterObj *pSavedObj, *pObj;
int size;
pSavedObj = (SMeterObj *)buffer;
if (pSavedObj->vnode < 0 || pSavedObj->vnode >= TSDB_MAX_VNODES) {
dTrace("vid:%d is out of range, corrupted meter obj file", pSavedObj->vnode);
return -1;
}
SVnodeCfg *pCfg = &vnodeList[pSavedObj->vnode].cfg;
if (pSavedObj->sid < 0 || pSavedObj->sid >= pCfg->maxSessions) {
dTrace("vid:%d, sid:%d is larger than max:%d", pSavedObj->vnode, pSavedObj->sid, pCfg->maxSessions);
return -1;
}
if (pSavedObj->meterId[0] == 0) return TSDB_CODE_SUCCESS;
size = sizeof(SMeterObj) + pSavedObj->sqlLen + 1;
pObj = (SMeterObj *)malloc(size);
if (pObj == NULL) {
dError("vid:%d sid:%d, no memory to allocate", pSavedObj->vnode, pSavedObj->sid);
return TSDB_CODE_SERV_OUT_OF_MEMORY;
}
pObj->schema = (SColumn *)malloc(pSavedObj->numOfColumns * sizeof(SColumn));
if (NULL == pObj->schema){
dError("vid:%d sid:%d, no memory to allocate for schema", pSavedObj->vnode, pSavedObj->sid);
free(pObj);
return TSDB_CODE_SERV_OUT_OF_MEMORY;
}
memcpy(pObj, pSavedObj, offsetof(SMeterObj, reserved));
pObj->numOfQueries = 0;
pObj->pCache = vnodeAllocateCacheInfo(pObj);
if (NULL == pObj->pCache){
dError("vid:%d sid:%d, no memory to allocate for cache", pSavedObj->vnode, pSavedObj->sid);
tfree(pObj->schema);
tfree(pObj);
return TSDB_CODE_SERV_OUT_OF_MEMORY;
}
vnodeList[pSavedObj->vnode].meterList[pSavedObj->sid] = pObj;
pObj->pStream = NULL;
memcpy(pObj->schema, buffer + offsetof(SMeterObj, reserved), pSavedObj->numOfColumns * sizeof(SColumn));
pObj->state = TSDB_METER_STATE_READY;
if (pObj->sqlLen > 0)
memcpy((char *)pObj + sizeof(SMeterObj),
((char *)pSavedObj) + offsetof(SMeterObj, reserved) + sizeof(SColumn) * pSavedObj->numOfColumns,
pSavedObj->sqlLen);
pObj->pSql = (char *)pObj + sizeof(SMeterObj);
pObj->lastKey = pObj->lastKeyOnFile;
if (pObj->lastKey > vnodeList[pObj->vnode].lastKey) vnodeList[pObj->vnode].lastKey = pObj->lastKey;
// taosSetSecurityInfo(pObj->vnode, pObj->sid, pObj->meterId, pObj->spi, pObj->encrypt, pObj->secret, pObj->cipheringKey);
dTrace("vid:%d sid:%d id:%s, meter is restored, uid:%" PRIu64 "", pObj->vnode, pObj->sid, pObj->meterId, pObj->uid);
return TSDB_CODE_SUCCESS;
}
int vnodeOpenMetersVnode(int vnode) {
FILE * fp;
char * buffer;
int64_t sid;
int64_t offset, length;
SVnodeObj *pVnode = &vnodeList[vnode];
fp = vnodeOpenMeterObjFile(vnode);
if (fp == NULL) return 0;
fseek(fp, TSDB_FILE_HEADER_VERSION_SIZE, SEEK_SET);
fread(&(pVnode->vnodeStatistic), sizeof(SVnodeStatisticInfo), 1, fp);
fseek(fp, TSDB_FILE_HEADER_LEN * 1 / 4, SEEK_SET);
#ifdef _TD_ARM_32_
fscanf(fp, "%lld %lld %lld ", &(pVnode->lastCreate), &(pVnode->lastRemove), &(pVnode->version));
fscanf(fp, "%lld %d %d ", &(pVnode->lastKeyOnFile), &(pVnode->fileId), &(pVnode->numOfFiles));
#else
fscanf(fp, "%ld %ld %ld ", &(pVnode->lastCreate), &(pVnode->lastRemove), &(pVnode->version));
fscanf(fp, "%ld %d %d ", &(pVnode->lastKeyOnFile), &(pVnode->fileId), &(pVnode->numOfFiles));
#endif
fseek(fp, TSDB_FILE_HEADER_LEN * 2 / 4, SEEK_SET);
fread(&pVnode->cfg, sizeof(SVnodeCfg), 1, fp);
if (vnodeIsValidVnodeCfg(&pVnode->cfg) == false) {
dError("vid:%d, maxSessions:%d cacheBlockSize:%d replications:%d daysPerFile:%d daysToKeep:%d invalid, clear it",
vnode, pVnode->cfg.maxSessions, pVnode->cfg.cacheBlockSize, pVnode->cfg.replications,
pVnode->cfg.daysPerFile, pVnode->cfg.daysToKeep);
pVnode->cfg.maxSessions = 0; // error in vnode file
return 0;
}
fseek(fp, TSDB_FILE_HEADER_LEN * 3 / 4, SEEK_SET);
fread(&pVnode->vpeers, sizeof(SVPeerDesc), TSDB_VNODES_SUPPORT, fp);
fseek(fp, TSDB_FILE_HEADER_LEN, SEEK_SET);
tsMeterSizeOnFile = sizeof(SMeterObj) + TSDB_MAX_COLUMNS * sizeof(SColumn) + TSDB_MAX_SAVED_SQL_LEN + sizeof(TSCKSUM);
int size = sizeof(SMeterObj *) * pVnode->cfg.maxSessions;
pVnode->meterList = (void *)malloc(size);
if (pVnode->meterList == NULL) return -1;
memset(pVnode->meterList, 0, size);
size = sizeof(SMeterObjHeader) * pVnode->cfg.maxSessions + sizeof(TSCKSUM);
pVnode->meterIndex = (SMeterObjHeader *)calloc(1, size);
if (pVnode->meterIndex == NULL) {
tfree(pVnode->meterList);
return -1;
}
// Read SMeterObjHeader list from file
if (fread(pVnode->meterIndex, size, 1, fp) < 0) return -1;
// if (!taosCheckChecksumWhole(pVnode->meterIndex, size)) {
// dError("vid: %d meter obj file header is broken since checksum mismatch", vnode);
// return -1;
// }
// Read the meter object from file and recover the structure
buffer = malloc(tsMeterSizeOnFile);
memset(buffer, 0, tsMeterSizeOnFile);
for (sid = 0; sid < pVnode->cfg.maxSessions; ++sid) {
offset = pVnode->meterIndex[sid].offset;
length = pVnode->meterIndex[sid].length;
if (offset <= 0 || length <= 0) continue;
fseek(fp, offset, SEEK_SET);
if (fread(buffer, length, 1, fp) <= 0) break;
if (taosCheckChecksumWhole((uint8_t *)buffer, length)) {
vnodeRestoreMeterObj(buffer, length - sizeof(TSCKSUM));
} else {
dError("meter object file is broken since checksum mismatch, vnode: %d sid: %d, try to recover", vnode, sid);
continue;
/* vnodeRecoverMeterObjectFile(vnode); */
}
}
tfree(buffer);
fclose(fp);
return 0;
}
void vnodeCloseMetersVnode(int vnode) {
SVnodeObj *pVnode = vnodeList + vnode;
SMeterObj *pObj;
if (pVnode->meterList) {
for (int sid = 0; sid < pVnode->cfg.maxSessions; ++sid) {
pObj = pVnode->meterList[sid];
if (pObj == NULL) continue;
vnodeFreeCacheInfo(pObj);
tfree(pObj->schema);
tfree(pObj);
}
tfree(pVnode->meterList);
}
pVnode->meterList = NULL;
}
int vnodeCreateMeterObj(SMeterObj *pNew, SConnSec *pSec) {
SMeterObj *pObj;
int code;
pObj = vnodeList[pNew->vnode].meterList[pNew->sid];
code = TSDB_CODE_SUCCESS;
if (pObj && pObj->uid == pNew->uid) {
if (pObj->sversion == pNew->sversion) {
dTrace("vid:%d sid:%d id:%s sversion:%d, identical meterObj, ignore create", pNew->vnode, pNew->sid,
pNew->meterId, pNew->sversion);
return -1;
}
dTrace("vid:%d sid:%d id:%s, update schema", pNew->vnode, pNew->sid, pNew->meterId);
if (!vnodeIsMeterState(pObj, TSDB_METER_STATE_UPDATING)) vnodeUpdateMeter(pNew, NULL);
return TSDB_CODE_SUCCESS;
}
if (pObj) {
dWarn("vid:%d sid:%d id:%s, old meter is there, remove it", pNew->vnode, pNew->sid, pNew->meterId);
vnodeRemoveMeterObj(pNew->vnode, pNew->sid);
}
pNew->pCache = vnodeAllocateCacheInfo(pNew);
if (pNew->pCache == NULL) {
code = TSDB_CODE_NO_RESOURCE;
} else {
vnodeList[pNew->vnode].meterList[pNew->sid] = pNew;
pNew->state = TSDB_METER_STATE_READY;
if (pNew->timeStamp > vnodeList[pNew->vnode].lastCreate) vnodeList[pNew->vnode].lastCreate = pNew->timeStamp;
vnodeSaveMeterObjToFile(pNew);
// vnodeCreateMeterMgmt(pNew, pSec);
vnodeCreateStream(pNew);
dTrace("vid:%d, sid:%d id:%s, meterObj is created, uid:%" PRIu64 "", pNew->vnode, pNew->sid, pNew->meterId, pNew->uid);
}
return code;
}
int vnodeRemoveMeterObj(int vnode, int sid) {
SMeterObj *pObj;
if (vnode < 0 || vnode >= TSDB_MAX_VNODES) {
dError("vid:%d is out of range", vnode);
return 0;
}
SVnodeCfg *pCfg = &vnodeList[vnode].cfg;
if (sid < 0 || sid >= pCfg->maxSessions) {
dError("vid:%d, sid:%d is larger than max:%d or less than 0", vnode, sid, pCfg->maxSessions);
return 0;
}
// vnode has been closed, no meters in this vnode
if (vnodeList[vnode].meterList == NULL) return 0;
pObj = vnodeList[vnode].meterList[sid];
if (pObj == NULL) {
return TSDB_CODE_SUCCESS;
}
if (!vnodeIsSafeToDeleteMeter(&vnodeList[vnode], sid)) {
return TSDB_CODE_ACTION_IN_PROGRESS;
}
// after remove this meter, change its state to DELETED
pObj->state = TSDB_METER_STATE_DROPPED;
pObj->timeStamp = taosGetTimestampMs();
vnodeList[vnode].lastRemove = pObj->timeStamp;
vnodeRemoveStream(pObj);
vnodeSaveMeterObjToFile(pObj);
vnodeFreeMeterObj(pObj);
return 0;
}
int vnodeInsertPoints(SMeterObj *pObj, char *cont, int contLen, char source, void *param, int sversion,
int *numOfInsertPoints, TSKEY now) {
int expectedLen, i;
short numOfPoints;
SSubmitMsg *pSubmit = (SSubmitMsg *)cont;
char * pData;
TSKEY tsKey;
int points = 0;
int code = TSDB_CODE_SUCCESS;
SVnodeObj * pVnode = vnodeList + pObj->vnode;
numOfPoints = htons(pSubmit->numOfRows);
expectedLen = numOfPoints * pObj->bytesPerPoint + sizeof(pSubmit->numOfRows);
if (expectedLen != contLen) {
dError("vid:%d sid:%d id:%s, invalid submit msg length:%d, expected:%d, bytesPerPoint: %d",
pObj->vnode, pObj->sid, pObj->meterId, contLen, expectedLen, pObj->bytesPerPoint);
code = TSDB_CODE_WRONG_MSG_SIZE;
goto _over;
}
// to guarantee time stamp is the same for all vnodes
pData = pSubmit->payLoad;
tsKey = now;
if (*((TSKEY *)pData) == 0) {
for (i = 0; i < numOfPoints; ++i) {
*((TSKEY *)pData) = tsKey++;
pData += pObj->bytesPerPoint;
}
}
if (numOfPoints >= (pVnode->cfg.blocksPerMeter - 2) * pObj->pointsPerBlock) {
code = TSDB_CODE_BATCH_SIZE_TOO_BIG;
dError("vid:%d sid:%d id:%s, batch size too big, insert points:%d, it shall be smaller than:%d", pObj->vnode, pObj->sid,
pObj->meterId, numOfPoints, (pVnode->cfg.blocksPerMeter - 2) * pObj->pointsPerBlock);
return code;
}
/*
* please refer to TBASE-926, data may be lost when the cache is full
*/
if (source == TSDB_DATA_SOURCE_SHELL && pVnode->cfg.replications > 1) {
code = vnodeForwardToPeer(pObj, cont, contLen, TSDB_ACTION_INSERT, sversion);
if (code != TSDB_CODE_SUCCESS) return code;
}
SCachePool *pPool = (SCachePool *)pVnode->pCachePool;
if (pObj->freePoints < numOfPoints || pObj->freePoints < (pObj->pointsPerBlock << 1) ||
pPool->notFreeSlots > pVnode->cfg.cacheNumOfBlocks.totalBlocks - 2) {
code = TSDB_CODE_ACTION_IN_PROGRESS;
dTrace("vid:%d sid:%d id:%s, cache is full, freePoints:%d, notFreeSlots:%d", pObj->vnode, pObj->sid, pObj->meterId,
pObj->freePoints, pPool->notFreeSlots);
vnodeProcessCommitTimer(pVnode, NULL);
return code;
}
// FIXME: Here should be after the comparison of sversions.
if (pVnode->cfg.commitLog && source != TSDB_DATA_SOURCE_LOG) {
if (pVnode->logFd < 0) return TSDB_CODE_INVALID_COMMIT_LOG;
code = vnodeWriteToCommitLog(pObj, TSDB_ACTION_INSERT, cont, contLen, sversion);
if (code != TSDB_CODE_SUCCESS) return code;
}
if (pObj->sversion < sversion) {
dTrace("vid:%d sid:%d id:%s, schema is changed, new:%d old:%d", pObj->vnode, pObj->sid, pObj->meterId, sversion,
pObj->sversion);
vnodeSendMeterCfgMsg(pObj->vnode, pObj->sid);
code = TSDB_CODE_ACTION_IN_PROGRESS;
return code;
} else if (pObj->sversion > sversion) {
dTrace("vid:%d sid:%d id:%s, client schema out of date, sql is invalid. client sversion:%d vnode sversion:%d",
pObj->vnode, pObj->sid, pObj->meterId, pObj->sversion, sversion);
code = TSDB_CODE_INVALID_SQL;
return code;
}
pData = pSubmit->payLoad;
TSKEY firstKey = *((TSKEY *)pData);
TSKEY lastKey = *((TSKEY *)(pData + pObj->bytesPerPoint * (numOfPoints - 1)));
int cfid = now/pVnode->cfg.daysPerFile/tsMsPerDay[(uint8_t)pVnode->cfg.precision];
TSKEY minAllowedKey = (cfid - pVnode->maxFiles + 1)*pVnode->cfg.daysPerFile*tsMsPerDay[(uint8_t)pVnode->cfg.precision];
TSKEY maxAllowedKey = (cfid + 2)*pVnode->cfg.daysPerFile*tsMsPerDay[(uint8_t)pVnode->cfg.precision] - 2;
if (firstKey < minAllowedKey || firstKey > maxAllowedKey || lastKey < minAllowedKey || lastKey > maxAllowedKey) {
dError("vid:%d sid:%d id:%s, vnode lastKeyOnFile:%" PRId64 ", data is out of range, numOfPoints:%d firstKey:%" PRId64 " lastKey:%" PRId64 " minAllowedKey:%" PRId64 " maxAllowedKey:%" PRId64,
pObj->vnode, pObj->sid, pObj->meterId, pVnode->lastKeyOnFile, numOfPoints,firstKey, lastKey, minAllowedKey, maxAllowedKey);
return TSDB_CODE_TIMESTAMP_OUT_OF_RANGE;
}
if ((code = vnodeSetMeterInsertImportStateEx(pObj, TSDB_METER_STATE_INSERTING)) != TSDB_CODE_SUCCESS) {
goto _over;
}
for (i = 0; i < numOfPoints; ++i) { // meter will be dropped, abort current insertion
if (vnodeIsMeterState(pObj, TSDB_METER_STATE_DROPPING)) {
dWarn("vid:%d sid:%d id:%s, meter is dropped, abort insert, state:%d", pObj->vnode, pObj->sid, pObj->meterId,
pObj->state);
code = TSDB_CODE_NOT_ACTIVE_TABLE;
break;
}
if (*((TSKEY *)pData) <= pObj->lastKey) {
dWarn("vid:%d sid:%d id:%s, received key:%" PRId64 " not larger than lastKey:%" PRId64, pObj->vnode, pObj->sid, pObj->meterId,
*((TSKEY *)pData), pObj->lastKey);
pData += pObj->bytesPerPoint;
continue;
}
if (!VALID_TIMESTAMP(*((TSKEY *)pData), tsKey, (uint8_t)pVnode->cfg.precision)) {
code = TSDB_CODE_TIMESTAMP_OUT_OF_RANGE;
break;
}
if (vnodeInsertPointToCache(pObj, pData) < 0) {
code = TSDB_CODE_ACTION_IN_PROGRESS;
break;
}
pObj->lastKey = *((TSKEY *)pData);
pData += pObj->bytesPerPoint;
points++;
}
atomic_fetch_add_64(&(pVnode->vnodeStatistic.pointsWritten), points * (pObj->numOfColumns - 1));
atomic_fetch_add_64(&(pVnode->vnodeStatistic.totalStorage), points * pObj->bytesPerPoint);
pthread_mutex_lock(&(pVnode->vmutex));
if (pObj->lastKey > pVnode->lastKey) pVnode->lastKey = pObj->lastKey;
if (firstKey < pVnode->firstKey) pVnode->firstKey = firstKey;
assert(pVnode->firstKey > 0);
pVnode->version++;
pthread_mutex_unlock(&(pVnode->vmutex));
vnodeClearMeterState(pObj, TSDB_METER_STATE_INSERTING);
_over:
dTrace("vid:%d sid:%d id:%s, %d out of %d points are inserted, lastKey:%" PRId64 " source:%d, vnode total storage: %" PRId64 "",
pObj->vnode, pObj->sid, pObj->meterId, points, numOfPoints, pObj->lastKey, source,
pVnode->vnodeStatistic.totalStorage);
*numOfInsertPoints = points;
return code;
}
/**
* continue running of the function may cause the free vnode crash with high probability
* todo fix it by set flag to disable commit in any cases
*
* @param param
* @param tmrId
*/
void vnodeProcessUpdateSchemaTimer(void *param, void *tmrId) {
SMeterObj * pObj = (SMeterObj *)param;
SVnodeObj * pVnode = vnodeList + pObj->vnode;
/*
* vnode may have been dropped, check it in the first place
* if the vnode is freed, the pObj is not valid any more, the pObj->vnode is meanless
* so may be the vid should be passed into this function as a parameter?
*/
if (pVnode->meterList == NULL) {
dTrace("vnode is deleted, abort update schema");
return;
}
SCachePool *pPool = (SCachePool *)pVnode->pCachePool;
pthread_mutex_lock(&pPool->vmutex);
if (pPool->commitInProcess) {
dTrace("vid:%d sid:%d mid:%s, committing in process, commit later", pObj->vnode, pObj->sid, pObj->meterId);
if (taosTmrStart(vnodeProcessUpdateSchemaTimer, 10, pObj, vnodeTmrCtrl) == NULL) {
vnodeClearMeterState(pObj, TSDB_METER_STATE_UPDATING);
}
pthread_mutex_unlock(&pPool->vmutex);
return;
}
pPool->commitInProcess = 1;
pthread_mutex_unlock(&pPool->vmutex);
vnodeCommitMultiToFile(pVnode, pObj->sid, pObj->sid);
}
void vnodeUpdateMeter(void *param, void *tmrId) {
SMeterObj *pNew = (SMeterObj *)param;
if (pNew == NULL || pNew->vnode < 0 || pNew->sid < 0) return;
SVnodeObj* pVnode = &vnodeList[pNew->vnode];
if (pVnode->meterList == NULL) {
dTrace("vid:%d sid:%d id:%s, vnode is deleted, status:%s, abort update schema",
pNew->vnode, pNew->sid, pNew->meterId, taosGetVnodeStatusStr(vnodeList[pNew->vnode].vnodeStatus));
free(pNew->schema);
free(pNew);
return;
}
SMeterObj *pObj = pVnode->meterList[pNew->sid];
if (pObj == NULL || vnodeIsMeterState(pObj, TSDB_METER_STATE_DROPPING)) {
dTrace("vid:%d sid:%d id:%s, meter is deleted, abort update schema", pNew->vnode, pNew->sid, pNew->meterId);
free(pNew->schema);
free(pNew);
return;
}
int32_t state = vnodeSetMeterState(pObj, TSDB_METER_STATE_UPDATING);
if (state >= TSDB_METER_STATE_DROPPING) {
dError("vid:%d sid:%d id:%s, meter is deleted, failed to update, state:%d",
pObj->vnode, pObj->sid, pObj->meterId, state);
return;
}
int32_t num = 0;
pthread_mutex_lock(&pVnode->vmutex);
num = pObj->numOfQueries;
pthread_mutex_unlock(&pVnode->vmutex);
if (num > 0 || state != TSDB_METER_STATE_READY) {
// the state may have been changed by vnodeSetMeterState, recover it in the first place
vnodeClearMeterState(pObj, TSDB_METER_STATE_UPDATING);
dTrace("vid:%d sid:%d id:%s, update failed, retry later, numOfQueries:%d, state:%d",
pNew->vnode, pNew->sid, pNew->meterId, num, state);
// retry update meter in 50ms
if (taosTmrStart(vnodeUpdateMeter, 50, pNew, vnodeTmrCtrl) == NULL) {
dError("vid:%d sid:%d id:%s, failed to start update timer, no retry", pNew->vnode, pNew->sid, pNew->meterId);
free(pNew->schema);
free(pNew);
}
return;
}
// commit first
if (!vnodeIsCacheCommitted(pObj)) {
// commit data first
if (taosTmrStart(vnodeProcessUpdateSchemaTimer, 0, pObj, vnodeTmrCtrl) == NULL) {
dError("vid:%d sid:%d id:%s, failed to start commit timer", pObj->vnode, pObj->sid, pObj->meterId);
vnodeClearMeterState(pObj, TSDB_METER_STATE_UPDATING);
free(pNew->schema);
free(pNew);
return;
}
if (taosTmrStart(vnodeUpdateMeter, 50, pNew, vnodeTmrCtrl) == NULL) {
dError("vid:%d sid:%d id:%s, failed to start update timer", pNew->vnode, pNew->sid, pNew->meterId);
vnodeClearMeterState(pObj, TSDB_METER_STATE_UPDATING);
free(pNew->schema);
free(pNew);
}
dTrace("vid:%d sid:%d meterId:%s, there are data in cache, commit first, update later",
pNew->vnode, pNew->sid, pNew->meterId);
vnodeClearMeterState(pObj, TSDB_METER_STATE_UPDATING);
return;
}
strcpy(pObj->meterId, pNew->meterId);
pObj->numOfColumns = pNew->numOfColumns;
pObj->timeStamp = pNew->timeStamp;
pObj->bytesPerPoint = pNew->bytesPerPoint;
pObj->maxBytes = pNew->maxBytes;
if (pObj->timeStamp > vnodeList[pObj->vnode].lastCreate) vnodeList[pObj->vnode].lastCreate = pObj->timeStamp;
tfree(pObj->schema);
pObj->schema = pNew->schema;
vnodeFreeCacheInfo(pObj);
pObj->pCache = vnodeAllocateCacheInfo(pObj);
pObj->sversion = pNew->sversion;
vnodeSaveMeterObjToFile(pObj);
vnodeClearMeterState(pObj, TSDB_METER_STATE_UPDATING);
dTrace("vid:%d sid:%d id:%s, schema is updated, state:%d", pObj->vnode, pObj->sid, pObj->meterId, pObj->state);
free(pNew);
}
void vnodeRecoverMeterObjectFile(int vnode) {
// TODO: start the recovery process
assert(0);
}
因为 它太大了无法显示 source diff 。你可以改为 查看blob
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "os.h"
#include "qextbuffer.h"
#include "taosmsg.h"
#include "tscJoinProcess.h"
#include "ttime.h"
#include "vnode.h"
#include "vnodeRead.h"
#include "vnodeUtil.h"
#include "vnodeQueryImpl.h"
#define ALL_CACHE_BLOCKS_CHECKED(q) \
(((q)->slot == (q)->currentSlot && QUERY_IS_ASC_QUERY(q)) || \
((q)->slot == (q)->firstSlot && (!QUERY_IS_ASC_QUERY(q))))
#define FORWARD_CACHE_BLOCK_CHECK_SLOT(slot, step, maxblocks) (slot) = ((slot) + (step) + (maxblocks)) % (maxblocks);
static bool isGroupbyEachTable(SSqlGroupbyExpr *pGroupbyExpr, tSidSet *pSidset) {
if (pGroupbyExpr == NULL || pGroupbyExpr->numOfGroupCols == 0) {
return false;
}
for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) {
SColIndexEx *pColIndex = &pGroupbyExpr->columnInfo[i];
if (pColIndex->flag == TSDB_COL_TAG) {
assert(pSidset->numOfSids == pSidset->numOfSubSet);
return true;
}
}
return false;
}
static bool doCheckWithPrevQueryRange(SQuery *pQuery, TSKEY nextKey) {
if ((nextKey > pQuery->ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
(nextKey < pQuery->ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
return false;
}
return true;
}
/**
* The start position of the first check cache block is located before starting the loop.
* And the start position for next cache blocks needs to be decided before checking each cache block.
*/
static void setStartPositionForCacheBlock(SQuery *pQuery, SCacheBlock *pBlock, bool *firstCheckSlot) {
if (!(*firstCheckSlot)) {
if (QUERY_IS_ASC_QUERY(pQuery)) {
pQuery->pos = 0;
} else {
pQuery->pos = pBlock->numOfPoints - 1;
}
} else {
(*firstCheckSlot) = false;
}
}
static void enableExecutionForNextTable(SQueryRuntimeEnv *pRuntimeEnv) {
SQuery *pQuery = pRuntimeEnv->pQuery;
for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) {
SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[i]);
if (pResInfo != NULL) {
pResInfo->complete = false;
}
}
}
static void queryOnMultiDataCache(SQInfo *pQInfo, SMeterDataInfo *pMeterDataInfo) {
SQuery * pQuery = &pQInfo->query;
STableQuerySupportObj *pSupporter = pQInfo->pTableQuerySupporter;
SQueryRuntimeEnv * pRuntimeEnv = &pQInfo->pTableQuerySupporter->runtimeEnv;
SMeterSidExtInfo **pMeterSidExtInfo = pSupporter->pMeterSidExtInfo;
SMeterObj *pTempMeterObj = getMeterObj(pSupporter->pMetersHashTable, pMeterSidExtInfo[0]->sid);
assert(pTempMeterObj != NULL);
__block_search_fn_t searchFn = vnodeSearchKeyFunc[pTempMeterObj->searchAlgorithm];
int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
dTrace("QInfo:%p start to query data in cache", pQInfo);
int64_t st = taosGetTimestampUs();
int32_t totalBlocks = 0;
for (int32_t groupIdx = 0; groupIdx < pSupporter->pSidSet->numOfSubSet; ++groupIdx) {
int32_t start = pSupporter->pSidSet->starterPos[groupIdx];
int32_t end = pSupporter->pSidSet->starterPos[groupIdx + 1] - 1;
if (isQueryKilled(pQInfo)) {
return;
}
for (int32_t k = start; k <= end; ++k) {
SMeterObj *pMeterObj = getMeterObj(pSupporter->pMetersHashTable, pMeterSidExtInfo[k]->sid);
if (pMeterObj == NULL) {
dError("QInfo:%p failed to find meterId:%d, continue", pQInfo, pMeterSidExtInfo[k]->sid);
continue;
}
pQInfo->pObj = pMeterObj;
pRuntimeEnv->pMeterObj = pMeterObj;
if (pMeterDataInfo[k].pMeterQInfo == NULL) {
pMeterDataInfo[k].pMeterQInfo =
createMeterQueryInfo(pSupporter, pMeterObj->sid, pSupporter->rawSKey, pSupporter->rawEKey);
}
if (pMeterDataInfo[k].pMeterObj == NULL) { // no data in disk for this meter, set its pointer
setMeterDataInfo(&pMeterDataInfo[k], pMeterObj, k, groupIdx);
}
assert(pMeterDataInfo[k].meterOrderIdx == k && pMeterObj == pMeterDataInfo[k].pMeterObj);
SMeterQueryInfo *pMeterQueryInfo = pMeterDataInfo[k].pMeterQInfo;
restoreIntervalQueryRange(pRuntimeEnv, pMeterQueryInfo);
/*
* Update the query meter column index and the corresponding filter column index
* the original column index info may be inconsistent with current meter in cache.
*
* The stable schema has been changed, but the meter schema, along with the data in cache,
* will not be updated until data with new schema arrive.
*/
vnodeUpdateQueryColumnIndex(pQuery, pMeterObj);
vnodeUpdateFilterColumnIndex(pQuery);
if ((pQuery->lastKey > pQuery->ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
(pQuery->lastKey < pQuery->ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
dTrace("QInfo:%p vid:%d sid:%d id:%s, query completed, ignore data in cache. qrange:%" PRId64 "-%" PRId64
", lastKey:%" PRId64,
pQInfo, pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->skey, pQuery->ekey,
pQuery->lastKey);
continue;
}
qTrace("QInfo:%p vid:%d sid:%d id:%s, query in cache, qrange:%" PRId64 "-%" PRId64 ", lastKey:%" PRId64, pQInfo,
pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->skey, pQuery->ekey, pQuery->lastKey);
/*
* find the appropriated start position in cache
* NOTE: (taking ascending order query for example)
* for the specific query range [pQuery->lastKey, pQuery->ekey], there may be no qualified result in cache.
* Therefore, we need the first point that is greater(less) than the pQuery->lastKey, so the boundary check
* should be ignored (the fourth parameter).
*/
TSKEY nextKey = getQueryStartPositionInCache(pRuntimeEnv, &pQuery->slot, &pQuery->pos, true);
if (nextKey < 0 || !doCheckWithPrevQueryRange(pQuery, nextKey)) {
qTrace("QInfo:%p vid:%d sid:%d id:%s, no data qualified in cache, cache blocks:%d, lastKey:%" PRId64, pQInfo,
pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->numOfBlocks, pQuery->lastKey);
continue;
}
// data in this block may be flushed to disk and this block is allocated to other meter
// todo try with remain cache blocks
SCacheBlock *pBlock = getCacheDataBlock(pMeterObj, pRuntimeEnv, pQuery->slot);
if (pBlock == NULL) {
continue;
}
bool firstCheckSlot = true;
SCacheInfo *pCacheInfo = (SCacheInfo *)pMeterObj->pCache;
for (int32_t i = 0; i < pCacheInfo->maxBlocks; ++i) {
pBlock = getCacheDataBlock(pMeterObj, pRuntimeEnv, pQuery->slot);
/*
* 1. pBlock == NULL. The cache block may be flushed to disk, so it is not available, skip and try next
* The check for empty block is refactor to getCacheDataBlock function
*/
if (pBlock == NULL) {
if (ALL_CACHE_BLOCKS_CHECKED(pQuery)) {
break;
}
FORWARD_CACHE_BLOCK_CHECK_SLOT(pQuery->slot, step, pCacheInfo->maxBlocks);
continue;
}
setStartPositionForCacheBlock(pQuery, pBlock, &firstCheckSlot);
TSKEY *primaryKeys = (TSKEY *)pRuntimeEnv->primaryColBuffer->data;
TSKEY key = primaryKeys[pQuery->pos];
// in handling file data block, the timestamp range validation is done during fetching candidate file blocks
if ((key > pSupporter->rawEKey && QUERY_IS_ASC_QUERY(pQuery)) ||
(key < pSupporter->rawEKey && !QUERY_IS_ASC_QUERY(pQuery))) {
break;
}
if (pQuery->intervalTime == 0) {
setExecutionContext(pSupporter, pMeterQueryInfo, k, pMeterDataInfo[k].groupIdx, key);
} else {
setIntervalQueryRange(pMeterQueryInfo, pSupporter, key);
int32_t ret = setAdditionalInfo(pSupporter, k, pMeterQueryInfo);
if (ret != TSDB_CODE_SUCCESS) {
pQInfo->killed = 1;
return;
}
}
qTrace("QInfo:%p vid:%d sid:%d id:%s, query in cache, qrange:%" PRId64 "-%" PRId64 ", lastKey:%" PRId64, pQInfo,
pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->skey, pQuery->ekey, pQuery->lastKey);
// only record the key on last block
SET_CACHE_BLOCK_FLAG(pRuntimeEnv->blockStatus);
SBlockInfo binfo = getBlockInfo(pRuntimeEnv);
dTrace("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", fileId:%d, slot:%d, pos:%d, bstatus:%d",
GET_QINFO_ADDR(pQuery), binfo.keyFirst, binfo.keyLast, pQuery->fileId, pQuery->slot, pQuery->pos,
pRuntimeEnv->blockStatus);
totalBlocks++;
stableApplyFunctionsOnBlock(pSupporter, &pMeterDataInfo[k], &binfo, NULL, searchFn);
if (ALL_CACHE_BLOCKS_CHECKED(pQuery)) {
break;
}
FORWARD_CACHE_BLOCK_CHECK_SLOT(pQuery->slot, step, pCacheInfo->maxBlocks);
}
}
}
int64_t time = taosGetTimestampUs() - st;
SQueryCostSummary *pSummary = &pRuntimeEnv->summary;
pSummary->blocksInCache += totalBlocks;
pSummary->cacheTimeUs += time;
pSummary->numOfTables = pSupporter->pSidSet->numOfSids;
dTrace("QInfo:%p complete check %d cache blocks, elapsed time:%.3fms", pQInfo, totalBlocks, time / 1000.0);
setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
}
static void queryOnMultiDataFiles(SQInfo *pQInfo, SMeterDataInfo *pMeterDataInfo) {
SQuery * pQuery = &pQInfo->query;
STableQuerySupportObj *pSupporter = pQInfo->pTableQuerySupporter;
SQueryRuntimeEnv * pRuntimeEnv = &pSupporter->runtimeEnv;
SMeterDataBlockInfoEx *pDataBlockInfoEx = NULL;
int32_t nAllocBlocksInfoSize = 0;
SMeterObj * pTempMeter = getMeterObj(pSupporter->pMetersHashTable, pSupporter->pMeterSidExtInfo[0]->sid);
__block_search_fn_t searchFn = vnodeSearchKeyFunc[pTempMeter->searchAlgorithm];
int32_t vnodeId = pTempMeter->vnode;
SQueryFilesInfo *pVnodeFileInfo = &pRuntimeEnv->vnodeFileInfo;
dTrace("QInfo:%p start to check data blocks in %d files", pQInfo, pVnodeFileInfo->numOfFiles);
int32_t fid = QUERY_IS_ASC_QUERY(pQuery) ? -1 : INT32_MAX;
int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order);
SQueryCostSummary *pSummary = &pRuntimeEnv->summary;
int64_t totalBlocks = 0;
int64_t st = taosGetTimestampUs();
while (1) {
if (isQueryKilled(pQInfo)) {
break;
}
int32_t fileIdx = vnodeGetVnodeHeaderFileIndex(&fid, pRuntimeEnv, pQuery->order.order);
if (fileIdx < 0) { // no valid file, abort current search
break;
}
pRuntimeEnv->startPos.fileId = fid;
pQuery->fileId = fid;
pSummary->numOfFiles++;
if (vnodeGetHeaderFile(pRuntimeEnv, fileIdx) != TSDB_CODE_SUCCESS) {
fid += step;
continue;
}
int32_t numOfQualifiedMeters = 0;
assert(fileIdx == pRuntimeEnv->vnodeFileInfo.current);
SMeterDataInfo **pReqMeterDataInfo = NULL;
int32_t ret = vnodeFilterQualifiedMeters(pQInfo, vnodeId, pSupporter->pSidSet, pMeterDataInfo,
&numOfQualifiedMeters, &pReqMeterDataInfo);
if (ret != TSDB_CODE_SUCCESS) {
dError("QInfo:%p failed to create meterdata struct to perform query processing, abort", pQInfo);
tfree(pReqMeterDataInfo);
pQInfo->code = -ret;
pQInfo->killed = 1;
return;
}
dTrace("QInfo:%p file:%s, %d meters qualified", pQInfo, pVnodeFileInfo->dataFilePath, numOfQualifiedMeters);
// none of meters in query set have pHeaderFileData in this file, try next file
if (numOfQualifiedMeters == 0) {
fid += step;
tfree(pReqMeterDataInfo);
continue;
}
uint32_t numOfBlocks = 0;
ret = getDataBlocksForMeters(pSupporter, pQuery, numOfQualifiedMeters, pVnodeFileInfo->headerFilePath,
pReqMeterDataInfo, &numOfBlocks);
if (ret != TSDB_CODE_SUCCESS) {
dError("QInfo:%p failed to get data block before scan data blocks, abort", pQInfo);
tfree(pReqMeterDataInfo);
pQInfo->code = -ret;
pQInfo->killed = 1;
return;
}
dTrace("QInfo:%p file:%s, %d meters contains %d blocks to be checked", pQInfo, pVnodeFileInfo->dataFilePath,
numOfQualifiedMeters, numOfBlocks);
if (numOfBlocks == 0) {
fid += step;
tfree(pReqMeterDataInfo);
continue;
}
ret = createDataBlocksInfoEx(pReqMeterDataInfo, numOfQualifiedMeters, &pDataBlockInfoEx, numOfBlocks,
&nAllocBlocksInfoSize, (int64_t)pQInfo);
if (ret != TSDB_CODE_SUCCESS) { // failed to create data blocks
dError("QInfo:%p build blockInfoEx failed, abort", pQInfo);
tfree(pReqMeterDataInfo);
pQInfo->code = -ret;
pQInfo->killed = 1;
return;
}
dTrace("QInfo:%p start to load %d blocks and check", pQInfo, numOfBlocks);
int64_t TRACE_OUTPUT_BLOCK_CNT = 10000;
int64_t stimeUnit = 0;
int64_t etimeUnit = 0;
totalBlocks += numOfBlocks;
// sequentially scan the pHeaderFileData file
int32_t j = QUERY_IS_ASC_QUERY(pQuery) ? 0 : numOfBlocks - 1;
for (; j < numOfBlocks && j >= 0; j += step) {
if (isQueryKilled(pQInfo)) {
break;
}
/* output elapsed time for log every TRACE_OUTPUT_BLOCK_CNT blocks */
if (j == 0) {
stimeUnit = taosGetTimestampMs();
} else if ((j % TRACE_OUTPUT_BLOCK_CNT) == 0) {
etimeUnit = taosGetTimestampMs();
dTrace("QInfo:%p load and check %" PRId64 " blocks, and continue. elapsed:%" PRId64 " ms", pQInfo,
TRACE_OUTPUT_BLOCK_CNT, etimeUnit - stimeUnit);
stimeUnit = taosGetTimestampMs();
}
SMeterDataBlockInfoEx *pInfoEx = &pDataBlockInfoEx[j];
SMeterDataInfo * pOneMeterDataInfo = pInfoEx->pMeterDataInfo;
SMeterQueryInfo * pMeterQueryInfo = pOneMeterDataInfo->pMeterQInfo;
SMeterObj * pMeterObj = pOneMeterDataInfo->pMeterObj;
pQInfo->pObj = pMeterObj;
pRuntimeEnv->pMeterObj = pMeterObj;
restoreIntervalQueryRange(pRuntimeEnv, pMeterQueryInfo);
if ((pQuery->lastKey > pQuery->ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
(pQuery->lastKey < pQuery->ekey && !QUERY_IS_ASC_QUERY(pQuery))) {
qTrace("QInfo:%p vid:%d sid:%d id:%s, query completed, no need to scan this data block. qrange:%" PRId64
"-%" PRId64 ", lastKey:%" PRId64,
pQInfo, pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->skey, pQuery->ekey,
pQuery->lastKey);
continue;
}
SCompBlock *pBlock = pInfoEx->pBlock.compBlock;
bool ondemandLoad = onDemandLoadDatablock(pQuery, pMeterQueryInfo->queryRangeSet);
ret = LoadDatablockOnDemand(pBlock, &pInfoEx->pBlock.fields, &pRuntimeEnv->blockStatus, pRuntimeEnv, fileIdx,
pInfoEx->blockIndex, searchFn, ondemandLoad);
if (ret != DISK_DATA_LOADED) {
pSummary->skippedFileBlocks++;
continue;
}
SBlockInfo binfo = getBlockBasicInfo(pRuntimeEnv, pBlock, BLK_FILE_BLOCK);
int64_t nextKey = -1;
assert(pQuery->pos >= 0 && pQuery->pos < pBlock->numOfPoints);
TSKEY *primaryKeys = (TSKEY *)pRuntimeEnv->primaryColBuffer->data;
if (IS_DATA_BLOCK_LOADED(pRuntimeEnv->blockStatus) && needPrimaryTimestampCol(pQuery, &binfo)) {
nextKey = primaryKeys[pQuery->pos];
if (!doCheckWithPrevQueryRange(pQuery, nextKey)) {
qTrace("QInfo:%p vid:%d sid:%d id:%s, no data qualified in data file, lastKey:%" PRId64, pQInfo,
pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->numOfBlocks, pQuery->lastKey);
continue;
}
} else {
// if data block is not loaded, it must be the intermediate blocks
assert((pBlock->keyFirst >= pQuery->lastKey && pBlock->keyLast <= pQuery->ekey && QUERY_IS_ASC_QUERY(pQuery)) ||
(pBlock->keyFirst >= pQuery->ekey && pBlock->keyLast <= pQuery->lastKey && !QUERY_IS_ASC_QUERY(pQuery)));
nextKey = QUERY_IS_ASC_QUERY(pQuery) ? pBlock->keyFirst : pBlock->keyLast;
}
if (pQuery->intervalTime == 0) {
setExecutionContext(pSupporter, pMeterQueryInfo, pOneMeterDataInfo->meterOrderIdx, pOneMeterDataInfo->groupIdx,
nextKey);
} else { // interval query
setIntervalQueryRange(pMeterQueryInfo, pSupporter, nextKey);
ret = setAdditionalInfo(pSupporter, pOneMeterDataInfo->meterOrderIdx, pMeterQueryInfo);
if (ret != TSDB_CODE_SUCCESS) {
tfree(pReqMeterDataInfo); // error code has been set
pQInfo->killed = 1;
return;
}
}
stableApplyFunctionsOnBlock(pSupporter, pOneMeterDataInfo, &binfo, pInfoEx->pBlock.fields, searchFn);
}
tfree(pReqMeterDataInfo);
// try next file
fid += step;
}
int64_t time = taosGetTimestampUs() - st;
dTrace("QInfo:%p complete check %d files, %d blocks, elapsed time:%.3fms", pQInfo, pVnodeFileInfo->numOfFiles,
totalBlocks, time / 1000.0);
pSummary->fileTimeUs += time;
pSummary->readDiskBlocks += totalBlocks;
pSummary->numOfTables = pSupporter->pSidSet->numOfSids;
setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
freeMeterBlockInfoEx(pDataBlockInfoEx, nAllocBlocksInfoSize);
}
static bool multimeterMultioutputHelper(SQInfo *pQInfo, bool *dataInDisk, bool *dataInCache, int32_t index,
int32_t start) {
STableQuerySupportObj *pSupporter = pQInfo->pTableQuerySupporter;
SMeterSidExtInfo **pMeterSidExtInfo = pSupporter->pMeterSidExtInfo;
SQueryRuntimeEnv * pRuntimeEnv = &pSupporter->runtimeEnv;
SQuery * pQuery = &pQInfo->query;
setQueryStatus(pQuery, QUERY_NOT_COMPLETED);
SMeterObj *pMeterObj = getMeterObj(pSupporter->pMetersHashTable, pMeterSidExtInfo[index]->sid);
if (pMeterObj == NULL) {
dError("QInfo:%p do not find required meter id: %d, all meterObjs id is:", pQInfo, pMeterSidExtInfo[index]->sid);
return false;
}
vnodeSetTagValueInParam(pSupporter->pSidSet, pRuntimeEnv, pMeterSidExtInfo[index]);
dTrace("QInfo:%p query on (%d): vid:%d sid:%d meterId:%s, qrange:%" PRId64 "-%" PRId64, pQInfo, index - start,
pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->skey, pQuery->ekey);
pQInfo->pObj = pMeterObj;
pQuery->lastKey = pQuery->skey;
pRuntimeEnv->pMeterObj = pMeterObj;
vnodeUpdateQueryColumnIndex(pQuery, pRuntimeEnv->pMeterObj);
vnodeUpdateFilterColumnIndex(pQuery);
vnodeCheckIfDataExists(pRuntimeEnv, pMeterObj, dataInDisk, dataInCache);
// data in file or cache is not qualified for the query. abort
if (!(dataInCache || dataInDisk)) {
dTrace("QInfo:%p vid:%d sid:%d meterId:%s, qrange:%" PRId64 "-%" PRId64 ", nores, %p", pQInfo, pMeterObj->vnode,
pMeterObj->sid, pMeterObj->meterId, pQuery->skey, pQuery->ekey, pQuery);
return false;
}
if (pRuntimeEnv->pTSBuf != NULL) {
if (pRuntimeEnv->cur.vnodeIndex == -1) {
int64_t tag = pRuntimeEnv->pCtx[0].tag.i64Key;
STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, tag);
// failed to find data with the specified tag value
if (elem.vnode < 0) {
return false;
}
} else {
tsBufSetCursor(pRuntimeEnv->pTSBuf, &pRuntimeEnv->cur);
}
}
initCtxOutputBuf(pRuntimeEnv);
return true;
}
static int64_t doCheckMetersInGroup(SQInfo *pQInfo, int32_t index, int32_t start) {
SQuery * pQuery = &pQInfo->query;
STableQuerySupportObj *pSupporter = pQInfo->pTableQuerySupporter;
SQueryRuntimeEnv * pRuntimeEnv = &pSupporter->runtimeEnv;
bool dataInDisk = true;
bool dataInCache = true;
if (!multimeterMultioutputHelper(pQInfo, &dataInDisk, &dataInCache, index, start)) {
return 0;
}
#if DEFAULT_IO_ENGINE == IO_ENGINE_MMAP
for (int32_t i = 0; i < pRuntimeEnv->numOfFiles; ++i) {
resetMMapWindow(&pRuntimeEnv->pVnodeFiles[i]);
}
#endif
SPointInterpoSupporter pointInterpSupporter = {0};
pointInterpSupporterInit(pQuery, &pointInterpSupporter);
if (!normalizedFirstQueryRange(dataInDisk, dataInCache, pSupporter, &pointInterpSupporter, NULL)) {
pointInterpSupporterDestroy(&pointInterpSupporter);
return 0;
}
/*
* here we set the value for before and after the specified time into the
* parameter for interpolation query
*/
pointInterpSupporterSetData(pQInfo, &pointInterpSupporter);
pointInterpSupporterDestroy(&pointInterpSupporter);
vnodeScanAllData(pRuntimeEnv);
// first/last_row query, do not invoke the finalize for super table query
doFinalizeResult(pRuntimeEnv);
int64_t numOfRes = getNumOfResult(pRuntimeEnv);
assert(numOfRes == 1 || numOfRes == 0);
// accumulate the point interpolation result
if (numOfRes > 0) {
pQuery->pointsRead += numOfRes;
forwardCtxOutputBuf(pRuntimeEnv, numOfRes);
}
return numOfRes;
}
/**
* super table query handler
* 1. super table projection query, group-by on normal columns query, ts-comp query
* 2. point interpolation query, last row query
*
* @param pQInfo
*/
static void vnodeSTableSeqProcessor(SQInfo *pQInfo) {
STableQuerySupportObj *pSupporter = pQInfo->pTableQuerySupporter;
SMeterSidExtInfo **pMeterSidExtInfo = pSupporter->pMeterSidExtInfo;
SQueryRuntimeEnv * pRuntimeEnv = &pSupporter->runtimeEnv;
SQuery * pQuery = &pQInfo->query;
tSidSet *pSids = pSupporter->pSidSet;
int32_t vid = getMeterObj(pSupporter->pMetersHashTable, pMeterSidExtInfo[0]->sid)->vnode;
if (isPointInterpoQuery(pQuery)) {
resetCtxOutputBuf(pRuntimeEnv);
assert(pQuery->limit.offset == 0 && pQuery->limit.limit != 0);
while (pSupporter->subgroupIdx < pSids->numOfSubSet) {
int32_t start = pSids->starterPos[pSupporter->subgroupIdx];
int32_t end = pSids->starterPos[pSupporter->subgroupIdx + 1] - 1;
if (isFirstLastRowQuery(pQuery)) {
dTrace("QInfo:%p last_row query on vid:%d, numOfGroups:%d, current group:%d", pQInfo, vid, pSids->numOfSubSet,
pSupporter->subgroupIdx);
TSKEY key = -1;
int32_t index = -1;
// choose the last key for one group
pSupporter->meterIdx = start;
for (int32_t k = start; k <= end; ++k, pSupporter->meterIdx++) {
if (isQueryKilled(pQInfo)) {
setQueryStatus(pQuery, QUERY_NO_DATA_TO_CHECK);
return;
}
// get the last key of meters that belongs to this group
SMeterObj *pMeterObj = getMeterObj(pSupporter->pMetersHashTable, pMeterSidExtInfo[k]->sid);
if (pMeterObj != NULL) {
if (key < pMeterObj->lastKey) {
key = pMeterObj->lastKey;
index = k;
}
}
}
pQuery->skey = key;
pQuery->ekey = key;
pSupporter->rawSKey = key;
pSupporter->rawEKey = key;
int64_t num = doCheckMetersInGroup(pQInfo, index, start);
assert(num >= 0);
} else {
dTrace("QInfo:%p interp query on vid:%d, numOfGroups:%d, current group:%d", pQInfo, vid, pSids->numOfSubSet,
pSupporter->subgroupIdx);
for (int32_t k = start; k <= end; ++k) {
if (isQueryKilled(pQInfo)) {
setQueryStatus(pQuery, QUERY_NO_DATA_TO_CHECK);
return;
}
pQuery->skey = pSupporter->rawSKey;
pQuery->ekey = pSupporter->rawEKey;
int64_t num = doCheckMetersInGroup(pQInfo, k, start);
if (num == 1) {
break;
}
}
}
pSupporter->subgroupIdx++;
// output buffer is full, return to client
if (pQuery->pointsRead >= pQuery->pointsToRead) {
break;
}
}
} else {
/*
* 1. super table projection query, 2. group-by on normal columns query, 3. ts-comp query
*/
assert(pSupporter->meterIdx >= 0);
/*
* if the subgroup index is larger than 0, results generated by group by tbname,k is existed.
* we need to return it to client in the first place.
*/
if (pSupporter->subgroupIdx > 0) {
copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
pQInfo->pointsRead += pQuery->pointsRead;
if (pQuery->pointsRead > 0) {
return;
}
}
if (pSupporter->meterIdx >= pSids->numOfSids) {
return;
}
resetCtxOutputBuf(pRuntimeEnv);
resetTimeWindowInfo(pRuntimeEnv, &pRuntimeEnv->windowResInfo);
while (pSupporter->meterIdx < pSupporter->numOfMeters) {
int32_t k = pSupporter->meterIdx;
if (isQueryKilled(pQInfo)) {
setQueryStatus(pQuery, QUERY_NO_DATA_TO_CHECK);
return;
}
TSKEY skey = pQInfo->pTableQuerySupporter->pMeterSidExtInfo[k]->key;
if (skey > 0) {
pQuery->skey = skey;
}
bool dataInDisk = true;
bool dataInCache = true;
if (!multimeterMultioutputHelper(pQInfo, &dataInDisk, &dataInCache, k, 0)) {
pQuery->skey = pSupporter->rawSKey;
pQuery->ekey = pSupporter->rawEKey;
pSupporter->meterIdx++;
continue;
}
#if DEFAULT_IO_ENGINE == IO_ENGINE_MMAP
for (int32_t i = 0; i < pRuntimeEnv->numOfFiles; ++i) {
resetMMapWindow(&pRuntimeEnv->pVnodeFiles[i]);
}
#endif
SPointInterpoSupporter pointInterpSupporter = {0};
if (normalizedFirstQueryRange(dataInDisk, dataInCache, pSupporter, &pointInterpSupporter, NULL) == false) {
pQuery->skey = pSupporter->rawSKey;
pQuery->ekey = pSupporter->rawEKey;
pSupporter->meterIdx++;
continue;
}
// TODO handle the limit problem
if (pQuery->numOfFilterCols == 0 && pQuery->limit.offset > 0) {
forwardQueryStartPosition(pRuntimeEnv);
if (Q_STATUS_EQUAL(pQuery->over, QUERY_NO_DATA_TO_CHECK | QUERY_COMPLETED)) {
pQuery->skey = pSupporter->rawSKey;
pQuery->ekey = pSupporter->rawEKey;
pSupporter->meterIdx++;
continue;
}
}
vnodeScanAllData(pRuntimeEnv);
pQuery->pointsRead = getNumOfResult(pRuntimeEnv);
doSkipResults(pRuntimeEnv);
// the limitation of output result is reached, set the query completed
if (doRevisedResultsByLimit(pQInfo)) {
pSupporter->meterIdx = pSupporter->pSidSet->numOfSids;
break;
}
// enable execution for next table, when handling the projection query
enableExecutionForNextTable(pRuntimeEnv);
if (Q_STATUS_EQUAL(pQuery->over, QUERY_NO_DATA_TO_CHECK | QUERY_COMPLETED)) {
/*
* query range is identical in terms of all meters involved in query,
* so we need to restore them at the *beginning* of query on each meter,
* not the consecutive query on meter on which is aborted due to buffer limitation
* to ensure that, we can reset the query range once query on a meter is completed.
*/
pQuery->skey = pSupporter->rawSKey;
pQuery->ekey = pSupporter->rawEKey;
pSupporter->meterIdx++;
pQInfo->pTableQuerySupporter->pMeterSidExtInfo[k]->key = pQuery->lastKey;
// if the buffer is full or group by each table, we need to jump out of the loop
if (Q_STATUS_EQUAL(pQuery->over, QUERY_RESBUF_FULL) ||
isGroupbyEachTable(pQuery->pGroupbyExpr, pSupporter->pSidSet)) {
break;
}
} else { // forward query range
pQuery->skey = pQuery->lastKey;
// all data in the result buffer are skipped due to the offset, continue to retrieve data from current meter
if (pQuery->pointsRead == 0) {
assert(!Q_STATUS_EQUAL(pQuery->over, QUERY_RESBUF_FULL));
continue;
} else {
pQInfo->pTableQuerySupporter->pMeterSidExtInfo[k]->key = pQuery->lastKey;
// buffer is full, wait for the next round to retrieve data from current meter
assert(Q_STATUS_EQUAL(pQuery->over, QUERY_RESBUF_FULL));
break;
}
}
}
}
/*
* 1. super table projection query, group-by on normal columns query, ts-comp query
* 2. point interpolation query, last row query
*
* group-by on normal columns query and last_row query do NOT invoke the finalizer here,
* since the finalize stage will be done at the client side.
*
* projection query, point interpolation query do not need the finalizer.
*
* Only the ts-comp query requires the finalizer function to be executed here.
*/
if (isTSCompQuery(pQuery)) {
doFinalizeResult(pRuntimeEnv);
}
if (pRuntimeEnv->pTSBuf != NULL) {
pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur;
}
// todo refactor
if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {
SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo;
for (int32_t i = 0; i < pWindowResInfo->size; ++i) {
SWindowStatus *pStatus = &pWindowResInfo->pResult[i].status;
pStatus->closed = true; // enable return all results for group by normal columns
SWindowResult *pResult = &pWindowResInfo->pResult[i];
for (int32_t j = 0; j < pQuery->numOfOutputCols; ++j) {
pResult->numOfRows = MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes);
}
}
pQInfo->pTableQuerySupporter->subgroupIdx = 0;
pQuery->pointsRead = 0;
copyFromWindowResToSData(pQInfo, pWindowResInfo->pResult);
}
pQInfo->pointsRead += pQuery->pointsRead;
pQuery->pointsOffset = pQuery->pointsToRead;
dTrace(
"QInfo %p vid:%d, numOfMeters:%d, index:%d, numOfGroups:%d, %d points returned, totalRead:%d totalReturn:%d,"
"next skey:%" PRId64 ", offset:%" PRId64,
pQInfo, vid, pSids->numOfSids, pSupporter->meterIdx, pSids->numOfSubSet, pQuery->pointsRead, pQInfo->pointsRead,
pQInfo->pointsReturned, pQuery->skey, pQuery->limit.offset);
}
static void doOrderedScan(SQInfo *pQInfo) {
STableQuerySupportObj *pSupporter = pQInfo->pTableQuerySupporter;
SQuery * pQuery = &pQInfo->query;
if (QUERY_IS_ASC_QUERY(pQuery)) {
queryOnMultiDataFiles(pQInfo, pSupporter->pMeterDataInfo);
if (pQInfo->code != TSDB_CODE_SUCCESS) {
return;
}
queryOnMultiDataCache(pQInfo, pSupporter->pMeterDataInfo);
} else {
queryOnMultiDataCache(pQInfo, pSupporter->pMeterDataInfo);
if (pQInfo->code != TSDB_CODE_SUCCESS) {
return;
}
queryOnMultiDataFiles(pQInfo, pSupporter->pMeterDataInfo);
}
}
static void setupMeterQueryInfoForSupplementQuery(STableQuerySupportObj *pSupporter) {
SQuery *pQuery = pSupporter->runtimeEnv.pQuery;
for (int32_t i = 0; i < pSupporter->numOfMeters; ++i) {
SMeterQueryInfo *pMeterQueryInfo = pSupporter->pMeterDataInfo[i].pMeterQInfo;
changeMeterQueryInfoForSuppleQuery(pQuery, pMeterQueryInfo, pSupporter->rawSKey, pSupporter->rawEKey);
}
}
static void doMultiMeterSupplementaryScan(SQInfo *pQInfo) {
STableQuerySupportObj *pSupporter = pQInfo->pTableQuerySupporter;
SQueryRuntimeEnv *pRuntimeEnv = &pSupporter->runtimeEnv;
SQuery * pQuery = &pQInfo->query;
if (!needSupplementaryScan(pQuery)) {
dTrace("QInfo:%p no need to do supplementary scan, query completed", pQInfo);
return;
}
SET_SUPPLEMENT_SCAN_FLAG(pRuntimeEnv);
disableFunctForSuppleScan(pSupporter, pQuery->order.order);
if (pRuntimeEnv->pTSBuf != NULL) {
pRuntimeEnv->pTSBuf->cur.order = pRuntimeEnv->pTSBuf->cur.order ^ 1u;
}
SWAP(pSupporter->rawSKey, pSupporter->rawEKey, TSKEY);
setupMeterQueryInfoForSupplementQuery(pSupporter);
int64_t st = taosGetTimestampMs();
doOrderedScan(pQInfo);
int64_t et = taosGetTimestampMs();
dTrace("QInfo:%p supplementary scan completed, elapsed time: %lldms", pQInfo, et - st);
/*
* restore the env
* the meter query info is not reset to the original state
*/
SWAP(pSupporter->rawSKey, pSupporter->rawEKey, TSKEY);
enableFunctForMasterScan(pRuntimeEnv, pQuery->order.order);
if (pRuntimeEnv->pTSBuf != NULL) {
pRuntimeEnv->pTSBuf->cur.order = pRuntimeEnv->pTSBuf->cur.order ^ 1;
}
SET_MASTER_SCAN_FLAG(pRuntimeEnv);
}
static void vnodeMultiMeterQueryProcessor(SQInfo *pQInfo) {
STableQuerySupportObj *pSupporter = pQInfo->pTableQuerySupporter;
SQueryRuntimeEnv * pRuntimeEnv = &pSupporter->runtimeEnv;
SQuery * pQuery = &pQInfo->query;
if (pSupporter->subgroupIdx > 0) {
/*
* if the subgroupIdx > 0, the query process must be completed yet, we only need to
* copy the data into output buffer
*/
if (pQuery->intervalTime > 0) {
copyResToQueryResultBuf(pSupporter, pQuery);
#ifdef _DEBUG_VIEW
displayInterResult(pQuery->sdata, pQuery, pQuery->sdata[0]->len);
#endif
} else {
copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
}
pQInfo->pointsRead += pQuery->pointsRead;
if (pQuery->pointsRead == 0) {
vnodePrintQueryStatistics(pSupporter);
}
dTrace("QInfo:%p points returned:%d, totalRead:%d totalReturn:%d", pQInfo, pQuery->pointsRead, pQInfo->pointsRead,
pQInfo->pointsReturned);
return;
}
pSupporter->pMeterDataInfo = (SMeterDataInfo *)calloc(1, sizeof(SMeterDataInfo) * pSupporter->numOfMeters);
if (pSupporter->pMeterDataInfo == NULL) {
dError("QInfo:%p failed to allocate memory, %s", pQInfo, strerror(errno));
pQInfo->code = -TSDB_CODE_SERV_OUT_OF_MEMORY;
return;
}
dTrace("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", order:%d, group:%d", pQInfo, pSupporter->rawSKey,
pSupporter->rawEKey, pQuery->order.order, pSupporter->pSidSet->numOfSubSet);
dTrace("QInfo:%p main query scan start", pQInfo);
int64_t st = taosGetTimestampMs();
doOrderedScan(pQInfo);
int64_t et = taosGetTimestampMs();
dTrace("QInfo:%p main scan completed, elapsed time: %lldms, supplementary scan start, order:%d", pQInfo, et - st,
pQuery->order.order ^ 1u);
if (pQuery->intervalTime > 0) {
for (int32_t i = 0; i < pSupporter->numOfMeters; ++i) {
SMeterQueryInfo *pMeterQueryInfo = pSupporter->pMeterDataInfo[i].pMeterQInfo;
closeAllTimeWindow(&pMeterQueryInfo->windowResInfo);
}
} else { // close results for group result
closeAllTimeWindow(&pRuntimeEnv->windowResInfo);
}
doMultiMeterSupplementaryScan(pQInfo);
if (isQueryKilled(pQInfo)) {
dTrace("QInfo:%p query killed, abort", pQInfo);
return;
}
if (pQuery->intervalTime > 0 || isSumAvgRateQuery(pQuery)) {
assert(pSupporter->subgroupIdx == 0 && pSupporter->numOfGroupResultPages == 0);
if (mergeMetersResultToOneGroups(pSupporter) == TSDB_CODE_SUCCESS) {
copyResToQueryResultBuf(pSupporter, pQuery);
#ifdef _DEBUG_VIEW
displayInterResult(pQuery->sdata, pQuery, pQuery->sdata[0]->len);
#endif
}
} else { // not a interval query
copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
}
// handle the limitation of output buffer
pQInfo->pointsRead += pQuery->pointsRead;
dTrace("QInfo:%p points returned:%d, totalRead:%d totalReturn:%d", pQInfo, pQuery->pointsRead, pQInfo->pointsRead,
pQInfo->pointsReturned);
}
/*
* in each query, this function will be called only once, no retry for further result.
*
* select count(*)/top(field,k)/avg(field name) from table_name [where ts>now-1a];
* select count(*) from table_name group by status_column;
*/
static void vnodeSingleTableFixedOutputProcessor(SQInfo *pQInfo) {
SQuery * pQuery = &pQInfo->query;
SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->pTableQuerySupporter->runtimeEnv;
assert(pQuery->slot >= 0 && pQuery->pos >= 0);
vnodeScanAllData(pRuntimeEnv);
doFinalizeResult(pRuntimeEnv);
if (isQueryKilled(pQInfo)) {
return;
}
// since the numOfOutputElems must be identical for all sql functions that are allowed to be executed simutanelously.
pQuery->pointsRead = getNumOfResult(pRuntimeEnv);
assert(pQuery->pointsRead <= pQuery->pointsToRead &&
Q_STATUS_EQUAL(pQuery->over, QUERY_COMPLETED | QUERY_NO_DATA_TO_CHECK));
// must be top/bottom query if offset > 0
if (pQuery->limit.offset > 0) {
assert(isTopBottomQuery(pQuery));
}
doSkipResults(pRuntimeEnv);
doRevisedResultsByLimit(pQInfo);
pQInfo->pointsRead = pQuery->pointsRead;
}
static void vnodeSingleTableMultiOutputProcessor(SQInfo *pQInfo) {
SQuery * pQuery = &pQInfo->query;
SMeterObj *pMeterObj = pQInfo->pObj;
SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->pTableQuerySupporter->runtimeEnv;
// for ts_comp query, re-initialized is not allowed
if (!isTSCompQuery(pQuery)) {
resetCtxOutputBuf(pRuntimeEnv);
}
while (1) {
vnodeScanAllData(pRuntimeEnv);
doFinalizeResult(pRuntimeEnv);
if (isQueryKilled(pQInfo)) {
return;
}
pQuery->pointsRead = getNumOfResult(pRuntimeEnv);
if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols > 0 && pQuery->pointsRead > 0) {
doSkipResults(pRuntimeEnv);
}
/*
* 1. if pQuery->pointsRead == 0, pQuery->limit.offset >= 0, still need to check data
* 2. if pQuery->pointsRead > 0, pQuery->limit.offset must be 0
*/
if (pQuery->pointsRead > 0 || Q_STATUS_EQUAL(pQuery->over, QUERY_COMPLETED | QUERY_NO_DATA_TO_CHECK)) {
break;
}
TSKEY nextTimestamp = loadRequiredBlockIntoMem(pRuntimeEnv, &pRuntimeEnv->nextPos);
assert(nextTimestamp > 0 || ((nextTimestamp < 0) && Q_STATUS_EQUAL(pQuery->over, QUERY_NO_DATA_TO_CHECK)));
dTrace("QInfo:%p vid:%d sid:%d id:%s, skip current result, offset:%" PRId64 ", next qrange:%" PRId64 "-%" PRId64,
pQInfo, pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->limit.offset, pQuery->lastKey,
pQuery->ekey);
resetCtxOutputBuf(pRuntimeEnv);
}
doRevisedResultsByLimit(pQInfo);
pQInfo->pointsRead += pQuery->pointsRead;
if (Q_STATUS_EQUAL(pQuery->over, QUERY_RESBUF_FULL)) {
TSKEY nextTimestamp = loadRequiredBlockIntoMem(pRuntimeEnv, &pRuntimeEnv->nextPos);
assert(nextTimestamp > 0 || ((nextTimestamp < 0) && Q_STATUS_EQUAL(pQuery->over, QUERY_NO_DATA_TO_CHECK)));
dTrace("QInfo:%p vid:%d sid:%d id:%s, query abort due to buffer limitation, next qrange:%" PRId64 "-%" PRId64,
pQInfo, pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->lastKey, pQuery->ekey);
}
dTrace("QInfo:%p vid:%d sid:%d id:%s, %d points returned, totalRead:%d totalReturn:%d", pQInfo, pMeterObj->vnode,
pMeterObj->sid, pMeterObj->meterId, pQuery->pointsRead, pQInfo->pointsRead, pQInfo->pointsReturned);
pQuery->pointsOffset = pQuery->pointsToRead; // restore the available buffer
if (!isTSCompQuery(pQuery)) {
assert(pQuery->pointsRead <= pQuery->pointsToRead);
}
}
static void vnodeSingleMeterIntervalMainLooper(STableQuerySupportObj *pSupporter, SQueryRuntimeEnv *pRuntimeEnv) {
SQuery *pQuery = pRuntimeEnv->pQuery;
while (1) {
initCtxOutputBuf(pRuntimeEnv);
vnodeScanAllData(pRuntimeEnv);
if (isQueryKilled(pQInfo)) {
return;
}
assert(!Q_STATUS_EQUAL(pQuery->over, QUERY_NOT_COMPLETED));
doFinalizeResult(pRuntimeEnv);
// here we can ignore the records in case of no interpolation
// todo handle offset, in case of top/bottom interval query
if ((pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL) && pQuery->limit.offset > 0 &&
pQuery->interpoType == TSDB_INTERPO_NONE) {
// maxOutput <= 0, means current query does not generate any results
int32_t numOfClosed = numOfClosedTimeWindow(&pRuntimeEnv->windowResInfo);
int32_t c = MIN(numOfClosed, pQuery->limit.offset);
clearFirstNTimeWindow(pRuntimeEnv, c);
pQuery->limit.offset -= c;
}
if (Q_STATUS_EQUAL(pQuery->over, QUERY_NO_DATA_TO_CHECK | QUERY_COMPLETED)) {
break;
}
// load the data block for the next retrieve
loadRequiredBlockIntoMem(pRuntimeEnv, &pRuntimeEnv->nextPos);
if (Q_STATUS_EQUAL(pQuery->over, QUERY_RESBUF_FULL)) {
break;
}
}
}
/* handle time interval query on single table */
static void vnodeSingleTableIntervalProcessor(SQInfo *pQInfo) {
SQuery * pQuery = &(pQInfo->query);
SMeterObj *pMeterObj = pQInfo->pObj;
STableQuerySupportObj *pSupporter = pQInfo->pTableQuerySupporter;
SQueryRuntimeEnv * pRuntimeEnv = &pSupporter->runtimeEnv;
int32_t numOfInterpo = 0;
while (1) {
resetCtxOutputBuf(pRuntimeEnv);
vnodeSingleMeterIntervalMainLooper(pSupporter, pRuntimeEnv);
if (pQuery->intervalTime > 0) {
pSupporter->subgroupIdx = 0; // always start from 0
pQuery->pointsRead = 0;
copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
clearFirstNTimeWindow(pRuntimeEnv, pSupporter->subgroupIdx);
}
// the offset is handled at prepare stage if no interpolation involved
if (pQuery->interpoType == TSDB_INTERPO_NONE) {
doRevisedResultsByLimit(pQInfo);
break;
} else {
taosInterpoSetStartInfo(&pRuntimeEnv->interpoInfo, pQuery->pointsRead, pQuery->interpoType);
SData **pInterpoBuf = pRuntimeEnv->pInterpoBuf;
for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) {
memcpy(pInterpoBuf[i]->data, pQuery->sdata[i]->data, pQuery->pointsRead * pQuery->pSelectExpr[i].resBytes);
}
numOfInterpo = 0;
pQuery->pointsRead = vnodeQueryResultInterpolate(pQInfo, (tFilePage **)pQuery->sdata, (tFilePage **)pInterpoBuf,
pQuery->pointsRead, &numOfInterpo);
dTrace("QInfo: %p interpo completed, final:%d", pQInfo, pQuery->pointsRead);
if (pQuery->pointsRead > 0 || Q_STATUS_EQUAL(pQuery->over, QUERY_COMPLETED | QUERY_NO_DATA_TO_CHECK)) {
doRevisedResultsByLimit(pQInfo);
break;
}
// no result generated yet, continue retrieve data
pQuery->pointsRead = 0;
}
}
// all data scanned, the group by normal column can return
if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) {//todo refactor with merge interval time result
pSupporter->subgroupIdx = 0;
pQuery->pointsRead = 0;
copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
clearFirstNTimeWindow(pRuntimeEnv, pSupporter->subgroupIdx);
}
pQInfo->pointsRead += pQuery->pointsRead;
pQInfo->pointsInterpo += numOfInterpo;
dTrace("%p vid:%d sid:%d id:%s, %d points returned %d points interpo, totalRead:%d totalInterpo:%d totalReturn:%d",
pQInfo, pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->pointsRead, numOfInterpo,
pQInfo->pointsRead - pQInfo->pointsInterpo, pQInfo->pointsInterpo, pQInfo->pointsReturned);
}
void vnodeSingleTableQuery(SSchedMsg *pMsg) {
SQInfo *pQInfo = (SQInfo *)pMsg->ahandle;
if (pQInfo == NULL || pQInfo->pTableQuerySupporter == NULL) {
dTrace("%p freed abort query", pQInfo);
return;
}
if (pQInfo->killed) {
dTrace("QInfo:%p it is already killed, abort", pQInfo);
vnodeDecRefCount(pQInfo);
return;
}
assert(pQInfo->refCount >= 1);
SQuery * pQuery = &pQInfo->query;
SMeterObj * pMeterObj = pQInfo->pObj;
STableQuerySupportObj *pSupporter = pQInfo->pTableQuerySupporter;
SQueryRuntimeEnv * pRuntimeEnv = &pSupporter->runtimeEnv;
assert(pRuntimeEnv->pMeterObj == pMeterObj);
dTrace("vid:%d sid:%d id:%s, query thread is created, numOfQueries:%d, QInfo:%p", pMeterObj->vnode, pMeterObj->sid,
pMeterObj->meterId, pMeterObj->numOfQueries, pQInfo);
if (vnodeHasRemainResults(pQInfo)) {
/*
* There are remain results that are not returned due to result interpolation
* So, we do keep in this procedure instead of launching retrieve procedure for next results.
*/
int32_t numOfInterpo = 0;
int32_t remain = taosNumOfRemainPoints(&pRuntimeEnv->interpoInfo);
pQuery->pointsRead = vnodeQueryResultInterpolate(pQInfo, (tFilePage **)pQuery->sdata,
(tFilePage **)pRuntimeEnv->pInterpoBuf, remain, &numOfInterpo);
doRevisedResultsByLimit(pQInfo);
pQInfo->pointsInterpo += numOfInterpo;
pQInfo->pointsRead += pQuery->pointsRead;
dTrace(
"QInfo:%p vid:%d sid:%d id:%s, %d points returned %d points interpo, totalRead:%d totalInterpo:%d "
"totalReturn:%d",
pQInfo, pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->pointsRead, numOfInterpo,
pQInfo->pointsRead, pQInfo->pointsInterpo, pQInfo->pointsReturned);
sem_post(&pQInfo->dataReady);
vnodeDecRefCount(pQInfo);
return;
}
// here we have scan all qualified data in both data file and cache
if (Q_STATUS_EQUAL(pQuery->over, QUERY_NO_DATA_TO_CHECK | QUERY_COMPLETED)) {
// continue to get push data from the group result
if (isGroupbyNormalCol(pQuery->pGroupbyExpr) ||
(pQuery->intervalTime > 0 && pQInfo->pointsReturned < pQuery->limit.limit)) {
//todo limit the output for interval query?
pQuery->pointsRead = 0;
pSupporter->subgroupIdx = 0; // always start from 0
if (pRuntimeEnv->windowResInfo.size > 0) {
copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult);
pQInfo->pointsRead += pQuery->pointsRead;
clearFirstNTimeWindow(pRuntimeEnv, pSupporter->subgroupIdx);
if (pQuery->pointsRead > 0) {
dTrace("QInfo:%p vid:%d sid:%d id:%s, %d points returned %d from group results, totalRead:%d totalReturn:%d",
pQInfo, pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->pointsRead, pQInfo->pointsRead,
pQInfo->pointsInterpo, pQInfo->pointsReturned);
sem_post(&pQInfo->dataReady);
vnodeDecRefCount(pQInfo);
return;
}
}
}
pQInfo->over = 1;
dTrace("QInfo:%p vid:%d sid:%d id:%s, query over, %d points are returned", pQInfo, pMeterObj->vnode, pMeterObj->sid,
pMeterObj->meterId, pQInfo->pointsRead);
vnodePrintQueryStatistics(pSupporter);
sem_post(&pQInfo->dataReady);
vnodeDecRefCount(pQInfo);
return;
}
/* number of points returned during this query */
pQuery->pointsRead = 0;
assert(pQuery->pos >= 0 && pQuery->slot >= 0);
int64_t st = taosGetTimestampUs();
// group by normal column, sliding window query, interval query are handled by interval query processor
if (pQuery->intervalTime != 0 || isGroupbyNormalCol(pQuery->pGroupbyExpr)) { // interval (down sampling operation)
assert(pQuery->checkBufferInLoop == 0 && pQuery->pointsOffset == pQuery->pointsToRead);
vnodeSingleTableIntervalProcessor(pQInfo);
} else {
if (isFixedOutputQuery(pQuery)) {
assert(pQuery->checkBufferInLoop == 0);
vnodeSingleTableFixedOutputProcessor(pQInfo);
} else { // diff/add/multiply/subtract/division
assert(pQuery->checkBufferInLoop == 1);
vnodeSingleTableMultiOutputProcessor(pQInfo);
}
}
// record the total elapsed time
pQInfo->useconds += (taosGetTimestampUs() - st);
/* check if query is killed or not */
if (isQueryKilled(pQInfo)) {
dTrace("QInfo:%p query is killed", pQInfo);
pQInfo->over = 1;
} else {
dTrace("QInfo:%p vid:%d sid:%d id:%s, meter query thread completed, %d points are returned", pQInfo,
pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->pointsRead);
}
sem_post(&pQInfo->dataReady);
vnodeDecRefCount(pQInfo);
}
void vnodeMultiMeterQuery(SSchedMsg *pMsg) {
SQInfo *pQInfo = (SQInfo *)pMsg->ahandle;
if (pQInfo == NULL || pQInfo->pTableQuerySupporter == NULL) {
return;
}
if (pQInfo->killed) {
vnodeDecRefCount(pQInfo);
dTrace("QInfo:%p it is already killed, abort", pQInfo);
return;
}
assert(pQInfo->refCount >= 1);
SQuery *pQuery = &pQInfo->query;
pQuery->pointsRead = 0;
int64_t st = taosGetTimestampUs();
if (pQuery->intervalTime > 0 ||
(isFixedOutputQuery(pQuery) && (!isPointInterpoQuery(pQuery)) && !isGroupbyNormalCol(pQuery->pGroupbyExpr))) {
assert(pQuery->checkBufferInLoop == 0);
vnodeMultiMeterQueryProcessor(pQInfo);
} else {
assert((pQuery->checkBufferInLoop == 1 && pQuery->intervalTime == 0) || isPointInterpoQuery(pQuery) ||
isGroupbyNormalCol(pQuery->pGroupbyExpr));
vnodeSTableSeqProcessor(pQInfo);
}
/* record the total elapsed time */
pQInfo->useconds += (taosGetTimestampUs() - st);
pQInfo->over = isQueryKilled(pQInfo) ? 1 : 0;
taosInterpoSetStartInfo(&pQInfo->pTableQuerySupporter->runtimeEnv.interpoInfo, pQuery->pointsRead,
pQInfo->query.interpoType);
STableQuerySupportObj *pSupporter = pQInfo->pTableQuerySupporter;
if (pQuery->pointsRead == 0) {
pQInfo->over = 1;
dTrace("QInfo:%p over, %d meters queried, %d points are returned", pQInfo, pSupporter->numOfMeters,
pQInfo->pointsRead);
vnodePrintQueryStatistics(pSupporter);
}
sem_post(&pQInfo->dataReady);
vnodeDecRefCount(pQInfo);
}
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "os.h"
#include "hash.h"
#include "hashfunc.h"
#include "ihash.h"
#include "qast.h"
#include "qextbuffer.h"
#include "taosmsg.h"
#include "tscJoinProcess.h"
#include "tscompression.h"
#include "vnode.h"
#include "vnodeRead.h"
#include "vnodeUtil.h"
int (*pQueryFunc[])(SMeterObj *, SQuery *) = {vnodeQueryFromCache, vnodeQueryFromFile};
int vnodeInterpolationSearchKey(char *pValue, int num, TSKEY key, int order) {
int firstPos, lastPos, midPos = -1;
int delta, numOfPoints;
TSKEY *keyList;
keyList = (TSKEY *)pValue;
firstPos = 0;
lastPos = num - 1;
if (order == 0) {
// from latest to oldest
while (1) {
if (key >= keyList[lastPos]) return lastPos;
if (key == keyList[firstPos]) return firstPos;
if (key < keyList[firstPos]) return firstPos - 1;
numOfPoints = lastPos - firstPos + 1;
delta = keyList[lastPos] - keyList[firstPos];
midPos = (key - keyList[firstPos]) / delta * numOfPoints + firstPos;
if (key < keyList[midPos]) {
lastPos = midPos - 1;
} else if (key > keyList[midPos]) {
firstPos = midPos + 1;
} else {
break;
}
}
} else {
// from oldest to latest
while (1) {
if (key <= keyList[firstPos]) return firstPos;
if (key == keyList[lastPos]) return lastPos;
if (key > keyList[lastPos]) {
lastPos = lastPos + 1;
if (lastPos >= num) return -1;
}
numOfPoints = lastPos - firstPos + 1;
delta = keyList[lastPos] - keyList[firstPos];
midPos = (key - keyList[firstPos]) / delta * numOfPoints + firstPos;
if (key < keyList[midPos]) {
lastPos = midPos - 1;
} else if (key > keyList[midPos]) {
firstPos = midPos + 1;
} else {
break;
}
}
}
return midPos;
}
int vnodeBinarySearchKey(char *pValue, int num, TSKEY key, int order) {
int firstPos, lastPos, midPos = -1;
int numOfPoints;
TSKEY *keyList;
if (num <= 0) return -1;
keyList = (TSKEY *)pValue;
firstPos = 0;
lastPos = num - 1;
if (order == 0) {
// find the first position which is smaller than the key
while (1) {
if (key >= keyList[lastPos]) return lastPos;
if (key == keyList[firstPos]) return firstPos;
if (key < keyList[firstPos]) return firstPos - 1;
numOfPoints = lastPos - firstPos + 1;
midPos = (numOfPoints >> 1) + firstPos;
if (key < keyList[midPos]) {
lastPos = midPos - 1;
} else if (key > keyList[midPos]) {
firstPos = midPos + 1;
} else {
break;
}
}
} else {
// find the first position which is bigger than the key
while (1) {
if (key <= keyList[firstPos]) return firstPos;
if (key == keyList[lastPos]) return lastPos;
if (key > keyList[lastPos]) {
lastPos = lastPos + 1;
if (lastPos >= num)
return -1;
else
return lastPos;
}
numOfPoints = lastPos - firstPos + 1;
midPos = (numOfPoints >> 1) + firstPos;
if (key < keyList[midPos]) {
lastPos = midPos - 1;
} else if (key > keyList[midPos]) {
firstPos = midPos + 1;
} else {
break;
}
}
}
return midPos;
}
int (*vnodeSearchKeyFunc[])(char *pValue, int num, TSKEY key, int order) = {vnodeBinarySearchKey,
vnodeInterpolationSearchKey};
static SQInfo *vnodeAllocateQInfoCommon(SQueryMeterMsg *pQueryMsg, SMeterObj *pMeterObj, SSqlFunctionExpr *pExprs) {
SQInfo *pQInfo = (SQInfo *)calloc(1, sizeof(SQInfo));
if (pQInfo == NULL) {
return NULL;
}
SQuery *pQuery = &(pQInfo->query);
SColumnInfo *colList = pQueryMsg->colList;
short numOfCols = pQueryMsg->numOfCols;
short numOfOutputCols = pQueryMsg->numOfOutputCols;
pQuery->numOfCols = numOfCols;
pQuery->numOfOutputCols = numOfOutputCols;
pQuery->limit.limit = pQueryMsg->limit;
pQuery->limit.offset = pQueryMsg->offset;
pQuery->order.order = pQueryMsg->order;
pQuery->order.orderColId = pQueryMsg->orderColId;
pQuery->colList = calloc(1, sizeof(SSingleColumnFilterInfo) * numOfCols);
if (pQuery->colList == NULL) {
goto _clean_memory;
}
for (int16_t i = 0; i < numOfCols; ++i) {
pQuery->colList[i].req[0] = 1; // column required during mater scan of data blocks
pQuery->colList[i].colIdxInBuf = i;
pQuery->colList[i].data = colList[i];
SColumnInfo *pColInfo = &pQuery->colList[i].data;
pColInfo->filters = NULL;
if (colList[i].numOfFilters > 0) {
pColInfo->filters = calloc(1, colList[i].numOfFilters * sizeof(SColumnFilterInfo));
for (int32_t j = 0; j < colList[i].numOfFilters; ++j) {
tscColumnFilterInfoCopy(&pColInfo->filters[j], &colList[i].filters[j]);
}
} else {
pQuery->colList[i].data.filters = NULL;
}
}
vnodeUpdateQueryColumnIndex(pQuery, pMeterObj);
for (int16_t col = 0; col < numOfOutputCols; ++col) {
assert(pExprs[col].resBytes > 0);
pQuery->rowSize += pExprs[col].resBytes;
if (TSDB_COL_IS_TAG(pExprs[col].pBase.colInfo.flag)) {
continue;
}
int16_t colId = pExprs[col].pBase.colInfo.colId;
int16_t functId = pExprs[col].pBase.functionId;
// build the projection of actual column data in buffer and the real column index
for (int32_t k = 0; k < numOfCols; ++k) {
if (pQuery->colList[k].data.colId == colId) {
pExprs[col].pBase.colInfo.colIdxInBuf = (int16_t)k;
pExprs[col].pBase.colInfo.colIdx = pQuery->colList[k].colIdx;
if (((functId == TSDB_FUNC_FIRST_DST || functId == TSDB_FUNC_FIRST) && pQuery->order.order == TSQL_SO_DESC) ||
((functId == TSDB_FUNC_LAST_DST || functId == TSDB_FUNC_LAST) && pQuery->order.order == TSQL_SO_ASC)) {
pQuery->colList[k].req[1] = 1;
} else if (functId == TSDB_FUNC_STDDEV) {
pQuery->colList[k].req[1] = 1;
}
break;
}
}
}
pQuery->pSelectExpr = pExprs;
int32_t ret = vnodeCreateFilterInfo(pQInfo, pQuery);
if (ret != TSDB_CODE_SUCCESS) {
goto _clean_memory;
}
vnodeUpdateFilterColumnIndex(pQuery);
pQuery->precision = vnodeList[pMeterObj->vnode].cfg.precision;
return pQInfo;
_clean_memory:
tfree(pQuery->pFilterInfo);
tfree(pQuery->colList);
tfree(pQInfo);
return NULL;
}
static SQInfo *vnodeAllocateQInfoEx(SQueryMeterMsg *pQueryMsg, SSqlGroupbyExpr *pGroupbyExpr, SSqlFunctionExpr *pExprs,
SMeterObj *pMeterObj) {
SQInfo *pQInfo = vnodeAllocateQInfoCommon(pQueryMsg, pMeterObj, pExprs);
if (pQInfo == NULL) {
tfree(pExprs);
tfree(pGroupbyExpr);
return NULL;
}
SQuery *pQuery = &(pQInfo->query);
/* pQuery->sdata is the results output buffer. */
pQuery->sdata = (SData **)calloc(pQuery->numOfOutputCols, sizeof(SData *));
if (pQuery->sdata == NULL) {
goto sign_clean_memory;
}
pQuery->pGroupbyExpr = pGroupbyExpr;
pQuery->intervalTime = pQueryMsg->intervalTime;
pQuery->slidingTime = pQueryMsg->slidingTime;
pQuery->interpoType = pQueryMsg->interpoType;
pQuery->intervalTimeUnit = pQueryMsg->intervalTimeUnit;
pQInfo->query.pointsToRead = vnodeList[pMeterObj->vnode].cfg.rowsInFileBlock;
for (int32_t col = 0; col < pQuery->numOfOutputCols; ++col) {
assert(pExprs[col].interResBytes >= pExprs[col].resBytes);
// allocate additional memory for interResults that are usually larger then final results
size_t size = (pQInfo->query.pointsToRead + 1) * pExprs[col].resBytes + pExprs[col].interResBytes + sizeof(SData);
pQuery->sdata[col] = (SData *)calloc(1, size);
if (pQuery->sdata[col] == NULL) {
goto sign_clean_memory;
}
}
if (pQuery->interpoType != TSDB_INTERPO_NONE) {
pQuery->defaultVal = malloc(sizeof(int64_t) * pQuery->numOfOutputCols);
if (pQuery->defaultVal == NULL) {
goto sign_clean_memory;
}
// the first column is the timestamp
memcpy(pQuery->defaultVal, (char *)pQueryMsg->defaultVal, pQuery->numOfOutputCols * sizeof(int64_t));
}
// to make sure third party won't overwrite this structure
pQInfo->signature = (uint64_t)pQInfo;
pQInfo->pObj = pMeterObj;
pQuery->slot = -1;
pQuery->pos = -1;
pQuery->hfd = -1;
pQuery->dfd = -1;
pQuery->lfd = -1;
dTrace("vid:%d sid:%d meterId:%s, QInfo is allocated:%p", pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId,
pQInfo);
return pQInfo;
sign_clean_memory:
tfree(pQuery->defaultVal);
if (pQuery->sdata != NULL) {
for (int16_t col = 0; col < pQuery->numOfOutputCols; ++col) {
tfree(pQuery->sdata[col]);
}
}
tfree(pQuery->sdata);
tfree(pQuery->pFilterInfo);
tfree(pQuery->colList);
tfree(pExprs);
tfree(pGroupbyExpr);
tfree(pQInfo);
return NULL;
}
SQInfo *vnodeAllocateQInfo(SQueryMeterMsg *pQueryMsg, SMeterObj *pObj, SSqlFunctionExpr *pExprs) {
SQInfo *pQInfo = vnodeAllocateQInfoCommon(pQueryMsg, pObj, pExprs);
if (pQInfo == NULL) {
tfree(pExprs);
return NULL;
}
SQuery *pQuery = &(pQInfo->query);
pQuery->sdata = (SData **)calloc(1, sizeof(SData *) * pQuery->numOfOutputCols);
if (pQuery->sdata == NULL) {
goto __clean_memory;
}
size_t size = 0;
int32_t numOfRows = vnodeList[pObj->vnode].cfg.rowsInFileBlock;
for (int col = 0; col < pQuery->numOfOutputCols; ++col) {
size = 2 * (numOfRows * pQuery->pSelectExpr[col].resBytes + sizeof(SData));
pQuery->sdata[col] = (SData *)malloc(size);
if (pQuery->sdata[col] == NULL) {
goto __clean_memory;
}
}
if (pQuery->colList[0].data.colId != PRIMARYKEY_TIMESTAMP_COL_INDEX) {
size = 2 * (numOfRows * TSDB_KEYSIZE + sizeof(SData));
pQuery->tsData = (SData *)malloc(size);
if (pQuery->tsData == NULL) {
goto __clean_memory;
}
}
// to make sure third party won't overwrite this structure
pQInfo->signature = (uint64_t)pQInfo;
pQInfo->pObj = pObj;
pQuery->slot = -1;
pQuery->hfd = -1;
pQuery->dfd = -1;
pQuery->lfd = -1;
pQuery->pos = -1;
pQuery->interpoType = TSDB_INTERPO_NONE;
dTrace("vid:%d sid:%d meterId:%s, QInfo is allocated:%p", pObj->vnode, pObj->sid, pObj->meterId, pQInfo);
return pQInfo;
__clean_memory:
tfree(pQuery->tsData);
if (pQuery->sdata != NULL) {
for (int col = 0; col < pQuery->numOfOutputCols; ++col) {
tfree(pQuery->sdata[col]);
}
}
tfree(pQuery->sdata);
tfree(pQuery->pFilterInfo);
tfree(pQuery->colList);
tfree(pExprs);
tfree(pQInfo);
return NULL;
}
void vnodeFreeQInfoInQueue(void *param) {
SQInfo *pQInfo = (SQInfo *)param;
if (!vnodeIsQInfoValid(pQInfo)) return;
pQInfo->killed = 1;
dTrace("QInfo:%p set kill flag to free QInfo");
vnodeDecRefCount(pQInfo);
}
void vnodeFreeQInfo(void *param, bool decQueryRef) {
SQInfo *pQInfo = (SQInfo *)param;
if (!vnodeIsQInfoValid(param)) return;
pQInfo->killed = 1;
SMeterObj *pObj = pQInfo->pObj;
dTrace("QInfo:%p start to free SQInfo", pQInfo);
if (decQueryRef) {
vnodeDecMeterRefcnt(pQInfo);
}
SQuery *pQuery = &(pQInfo->query);
tclose(pQuery->hfd);
tclose(pQuery->dfd);
tclose(pQuery->lfd);
vnodeFreeFields(pQuery);
tfree(pQuery->pBlock);
for (int col = 0; col < pQuery->numOfOutputCols; ++col) {
tfree(pQuery->sdata[col]);
}
for (int col = 0; col < pQuery->numOfCols; ++col) {
vnodeFreeColumnInfo(&pQuery->colList[col].data);
}
if (pQuery->colList[0].colIdx != PRIMARYKEY_TIMESTAMP_COL_INDEX) {
tfree(pQuery->tsData);
}
sem_destroy(&(pQInfo->dataReady));
vnodeQueryFreeQInfoEx(pQInfo);
for (int32_t i = 0; i < pQuery->numOfFilterCols; ++i) {
SSingleColumnFilterInfo *pColFilter = &pQuery->pFilterInfo[i];
if (pColFilter->numOfFilters > 0) {
tfree(pColFilter->pFilters);
}
}
tfree(pQuery->pFilterInfo);
tfree(pQuery->colList);
tfree(pQuery->sdata);
if (pQuery->pSelectExpr != NULL) {
for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) {
SSqlBinaryExprInfo *pBinExprInfo = &pQuery->pSelectExpr[i].pBinExprInfo;
if (pBinExprInfo->numOfCols > 0) {
tfree(pBinExprInfo->pReqColumns);
tSQLBinaryExprDestroy(&pBinExprInfo->pBinExpr, NULL);
}
}
tfree(pQuery->pSelectExpr);
}
if (pQuery->defaultVal != NULL) {
tfree(pQuery->defaultVal);
}
tfree(pQuery->pGroupbyExpr);
dTrace("QInfo:%p vid:%d sid:%d meterId:%s, QInfo is freed", pQInfo, pObj->vnode, pObj->sid, pObj->meterId);
//destroy signature, in order to avoid the query process pass the object safety check
memset(pQInfo, 0, sizeof(SQInfo));
tfree(pQInfo);
}
bool vnodeIsQInfoValid(void *param) {
SQInfo *pQInfo = (SQInfo *)param;
if (pQInfo == NULL) {
return false;
}
/*
* pQInfo->signature may be changed by another thread, so we assign value of signature
* into local variable, then compare by using local variable
*/
uint64_t sig = pQInfo->signature;
return (sig == (uint64_t)pQInfo);
}
void vnodeDecRefCount(void *param) {
SQInfo *pQInfo = (SQInfo*) param;
assert(vnodeIsQInfoValid(pQInfo));
int32_t ref = atomic_sub_fetch_32(&pQInfo->refCount, 1);
assert(ref >= 0);
dTrace("QInfo:%p decrease obj refcount, %d", pQInfo, ref);
if (ref == 0) {
vnodeFreeQInfo(pQInfo, true);
}
}
void vnodeAddRefCount(void *param) {
SQInfo *pQInfo = (SQInfo*) param;
assert(vnodeIsQInfoValid(pQInfo));
int32_t ref = atomic_add_fetch_32(&pQInfo->refCount, 1);
dTrace("QInfo:%p add refcount, %d", pQInfo, ref);
}
void vnodeQueryData(SSchedMsg *pMsg) {
SQuery *pQuery;
SQInfo *pQInfo;
pQInfo = (SQInfo *)pMsg->ahandle;
if (pQInfo->killed) {
dTrace("QInfo:%p it is already killed, abort", pQInfo);
vnodeDecRefCount(pQInfo);
return;
}
pQuery = &(pQInfo->query);
SMeterObj *pObj = pQInfo->pObj;
dTrace("QInfo:%p vid:%d sid:%d id:%s, query thread is created, numOfQueries:%d, func:%s", pQInfo, pObj->vnode,
pObj->sid, pObj->meterId, pObj->numOfQueries, __FUNCTION__);
pQuery->pointsToRead = vnodeList[pObj->vnode].cfg.rowsInFileBlock;
pQuery->pointsOffset = pQInfo->bufIndex * pQuery->pointsToRead;
int64_t st = taosGetTimestampUs();
while (1) {
int64_t potentNumOfRes = pQInfo->pointsRead + pQuery->pointsToRead;
/* limit the potential overflow data */
if (pQuery->limit.limit > 0 && potentNumOfRes > pQuery->limit.limit) {
pQuery->pointsToRead = pQuery->limit.limit - pQInfo->pointsRead;
if (pQuery->pointsToRead == 0) {
/* reach the limitation, abort */
pQuery->pointsRead = 0;
pQInfo->over = 1;
break;
}
}
pQInfo->code = (*pQInfo->fp)(pObj, pQuery); // <0:error
// has read at least one point
if (pQuery->pointsRead > 0 || pQInfo->code < 0) break;
if (pQuery->pointsRead == 0 && pQuery->over == 0) continue;
if (pQInfo->changed) {
pQInfo->over = 1;
break;
}
// has read all data in file, check data in cache
pQInfo->fp = pQueryFunc[pQuery->order.order ^ 1];
pQInfo->changed = 1;
pQuery->slot = -1; // reset the handle
pQuery->over = 0;
dTrace("vid:%d sid:%d id:%s, query in other media, order:%d, skey:%" PRId64 " query:%p", pObj->vnode, pObj->sid,
pObj->meterId, pQuery->order.order, pQuery->skey, pQuery);
}
pQInfo->pointsRead += pQuery->pointsRead;
dTrace("vid:%d sid:%d id:%s, %d points returned, totalRead:%d totalReturn:%d last key:%" PRId64 ", query:%p", pObj->vnode,
pObj->sid, pObj->meterId, pQuery->pointsRead, pQInfo->pointsRead, pQInfo->pointsReturned, pQuery->lastKey,
pQuery);
int64_t et = taosGetTimestampUs();
pQInfo->useconds += et - st;
// close FDs as soon as possible
if (pQInfo->over) {
dTrace("vid:%d sid:%d id:%s, query over, %d points are returned", pObj->vnode, pObj->sid, pObj->meterId,
pQInfo->pointsRead);
tclose(pQInfo->query.hfd);
tclose(pQInfo->query.dfd);
tclose(pQInfo->query.lfd);
}
sem_post(&pQInfo->dataReady);
vnodeDecRefCount(pQInfo);
}
void *vnodeQueryOnSingleTable(SMeterObj **pMetersObj, SSqlGroupbyExpr *pGroupbyExpr, SSqlFunctionExpr *pSqlExprs,
SQueryMeterMsg *pQueryMsg, int32_t *code) {
SQInfo *pQInfo;
SQuery *pQuery;
SMeterObj *pMeterObj = pMetersObj[0];
bool isProjQuery = vnodeIsProjectionQuery(pSqlExprs, pQueryMsg->numOfOutputCols);
// todo pass the correct error code
if (isProjQuery && pQueryMsg->tsLen == 0) {
pQInfo = vnodeAllocateQInfo(pQueryMsg, pMeterObj, pSqlExprs);
} else {
pQInfo = vnodeAllocateQInfoEx(pQueryMsg, pGroupbyExpr, pSqlExprs, pMetersObj[0]);
}
if (pQInfo == NULL) {
*code = TSDB_CODE_SERV_OUT_OF_MEMORY;
goto _error;
}
pQuery = &(pQInfo->query);
dTrace("qmsg:%p create QInfo:%p, QInfo created", pQueryMsg, pQInfo);
SMeterSidExtInfo** pSids = (SMeterSidExtInfo**)pQueryMsg->pSidExtInfo;
if (pSids != NULL && pSids[0]->key > 0) {
pQuery->skey = pSids[0]->key;
} else {
pQuery->skey = pQueryMsg->skey;
}
pQuery->ekey = pQueryMsg->ekey;
pQuery->lastKey = pQuery->skey;
pQInfo->fp = pQueryFunc[pQueryMsg->order];
if (sem_init(&(pQInfo->dataReady), 0, 0) != 0) {
dError("QInfo:%p vid:%d sid:%d meterId:%s, init dataReady sem failed, reason:%s", pQInfo, pMeterObj->vnode,
pMeterObj->sid, pMeterObj->meterId, strerror(errno));
*code = TSDB_CODE_APP_ERROR;
goto _error;
}
SSchedMsg schedMsg = {0};
if (isProjQuery && pQueryMsg->tsLen == 0) {
schedMsg.fp = vnodeQueryData;
} else {
if (vnodeParametersSafetyCheck(pQuery) == false) {
*code = TSDB_CODE_APP_ERROR;
goto _error;
}
STableQuerySupportObj *pSupporter = (STableQuerySupportObj *)calloc(1, sizeof(STableQuerySupportObj));
pSupporter->numOfMeters = 1;
pSupporter->pMetersHashTable = taosHashInit(pSupporter->numOfMeters, taosIntHash_32, false);
taosHashPut(pSupporter->pMetersHashTable, (const char*) &pMetersObj[0]->sid, sizeof(pMeterObj[0].sid),
(char *)&pMetersObj[0], POINTER_BYTES);
pSupporter->pSidSet = NULL;
pSupporter->subgroupIdx = -1;
pSupporter->pMeterSidExtInfo = NULL;
pQInfo->pTableQuerySupporter = pSupporter;
STSBuf *pTSBuf = NULL;
if (pQueryMsg->tsLen > 0) {
// open new file to save the result
char *tsBlock = (char *)pQueryMsg + pQueryMsg->tsOffset;
pTSBuf = tsBufCreateFromCompBlocks(tsBlock, pQueryMsg->tsNumOfBlocks, pQueryMsg->tsLen, pQueryMsg->tsOrder);
tsBufResetPos(pTSBuf);
tsBufNextPos(pTSBuf);
}
if (((*code) = vnodeQueryTablePrepare(pQInfo, pQInfo->pObj, pSupporter, pTSBuf)) != TSDB_CODE_SUCCESS) {
goto _error;
}
if (pQInfo->over == 1) {
vnodeAddRefCount(pQInfo); // for retrieve procedure
return pQInfo;
}
schedMsg.fp = vnodeSingleTableQuery;
}
/*
* The reference count, which is 2, is for both the current query thread and the future retrieve request,
* which will always be issued by client to acquire data or free SQInfo struct.
*/
vnodeAddRefCount(pQInfo);
vnodeAddRefCount(pQInfo);
schedMsg.msg = NULL;
schedMsg.thandle = (void *)1;
schedMsg.ahandle = pQInfo;
dTrace("QInfo:%p set query flag and prepare runtime environment completed, ref:%d, wait for schedule", pQInfo,
pQInfo->refCount);
taosScheduleTask(tsQueryQhandle, &schedMsg);
return pQInfo;
_error:
// table query ref will be decrease during error handling
vnodeFreeQInfo(pQInfo, false);
return NULL;
}
/*
* query on multi-meters
*/
void *vnodeQueryOnMultiMeters(SMeterObj **pMetersObj, SSqlGroupbyExpr *pGroupbyExpr, SSqlFunctionExpr *pSqlExprs,
SQueryMeterMsg *pQueryMsg, int32_t *code) {
SQInfo *pQInfo;
SQuery *pQuery;
assert(QUERY_IS_STABLE_QUERY(pQueryMsg->queryType) && pQueryMsg->numOfCols > 0 && pQueryMsg->pSidExtInfo != 0 &&
pQueryMsg->numOfSids >= 1);
pQInfo = vnodeAllocateQInfoEx(pQueryMsg, pGroupbyExpr, pSqlExprs, *pMetersObj);
if (pQInfo == NULL) {
*code = TSDB_CODE_SERV_OUT_OF_MEMORY;
goto _error;
}
pQuery = &(pQInfo->query);
dTrace("qmsg:%p create QInfo:%p, QInfo created", pQueryMsg, pQInfo);
pQuery->skey = pQueryMsg->skey;
pQuery->ekey = pQueryMsg->ekey;
pQInfo->fp = pQueryFunc[pQueryMsg->order];
if (sem_init(&(pQInfo->dataReady), 0, 0) != 0) {
dError("QInfo:%p vid:%d sid:%d id:%s, init dataReady sem failed, reason:%s", pQInfo, pMetersObj[0]->vnode,
pMetersObj[0]->sid, pMetersObj[0]->meterId, strerror(errno));
*code = TSDB_CODE_APP_ERROR;
goto _error;
}
SSchedMsg schedMsg = {0};
STableQuerySupportObj *pSupporter = (STableQuerySupportObj *)calloc(1, sizeof(STableQuerySupportObj));
pSupporter->numOfMeters = pQueryMsg->numOfSids;
pSupporter->pMetersHashTable = taosHashInit(pSupporter->numOfMeters, taosIntHash_32, false);
for (int32_t i = 0; i < pSupporter->numOfMeters; ++i) {
taosHashPut(pSupporter->pMetersHashTable, (const char*) &pMetersObj[i]->sid, sizeof(pMetersObj[i]->sid), (char *)&pMetersObj[i],
POINTER_BYTES);
}
int32_t sidElemLen = pQueryMsg->tagLength + sizeof(SMeterSidExtInfo);
int32_t size = POINTER_BYTES * pQueryMsg->numOfSids + sidElemLen * pQueryMsg->numOfSids;
pSupporter->pMeterSidExtInfo = (SMeterSidExtInfo **)malloc(size);
if (pSupporter->pMeterSidExtInfo == NULL) {
*code = TSDB_CODE_SERV_OUT_OF_MEMORY;
dError("QInfo:%p failed to allocate memory for meterSid info, size:%d, abort", pQInfo, size);
goto _error;
}
char *px = ((char *)pSupporter->pMeterSidExtInfo) + POINTER_BYTES * pQueryMsg->numOfSids;
for (int32_t i = 0; i < pQueryMsg->numOfSids; ++i) {
SMeterSidExtInfo* pSrc = ((SMeterSidExtInfo **)pQueryMsg->pSidExtInfo)[i];
SMeterSidExtInfo* pDst = (SMeterSidExtInfo *)px;
pSupporter->pMeterSidExtInfo[i] = pDst;
pDst->sid = pSrc->sid;
pDst->uid = pSrc->uid;
pDst->key = pSrc->key;
if (pQueryMsg->tagLength > 0) {
memcpy(pDst->tags, pSrc->tags, pQueryMsg->tagLength);
}
px += sidElemLen;
}
if (pGroupbyExpr != NULL && pGroupbyExpr->numOfGroupCols > 0) {
pSupporter->pSidSet =
tSidSetCreate(pSupporter->pMeterSidExtInfo, pQueryMsg->numOfSids, (SSchema *)pQueryMsg->pTagSchema,
pQueryMsg->numOfTagsCols, pGroupbyExpr->columnInfo, pGroupbyExpr->numOfGroupCols);
} else {
pSupporter->pSidSet = tSidSetCreate(pSupporter->pMeterSidExtInfo, pQueryMsg->numOfSids,
(SSchema *)pQueryMsg->pTagSchema, pQueryMsg->numOfTagsCols, NULL, 0);
}
pQInfo->pTableQuerySupporter = pSupporter;
STSBuf *pTSBuf = NULL;
if (pQueryMsg->tsLen > 0) {
// open new file to save the result
char *tsBlock = (char *)pQueryMsg + pQueryMsg->tsOffset;
pTSBuf = tsBufCreateFromCompBlocks(tsBlock, pQueryMsg->tsNumOfBlocks, pQueryMsg->tsLen, pQueryMsg->tsOrder);
tsBufResetPos(pTSBuf);
}
if (((*code) = vnodeSTableQueryPrepare(pQInfo, pQuery, pTSBuf)) != TSDB_CODE_SUCCESS) {
goto _error;
}
vnodeAddRefCount(pQInfo);
if (pQInfo->over == 1) {
return pQInfo;
}
vnodeAddRefCount(pQInfo);
schedMsg.msg = NULL;
schedMsg.thandle = (void *)1;
schedMsg.ahandle = pQInfo;
schedMsg.fp = vnodeMultiMeterQuery;
dTrace("QInfo:%p set query flag and prepare runtime environment completed, wait for schedule", pQInfo);
taosScheduleTask(tsQueryQhandle, &schedMsg);
return pQInfo;
_error:
// table query ref will be decrease during error handling
vnodeFreeQInfo(pQInfo, false);
return NULL;
}
/* engine provides the storage, the app has to save the data before next
retrieve, *pNum is the number of points retrieved, and argv[] is
the point to retrieved column
*/
int vnodeRetrieveQueryInfo(void *handle, int *numOfRows, int *rowSize, int16_t *timePrec) {
SQInfo *pQInfo;
SQuery *pQuery;
*numOfRows = 0;
*rowSize = 0;
pQInfo = (SQInfo *)handle;
if (pQInfo == NULL) {
return TSDB_CODE_INVALID_QHANDLE;
}
pQuery = &(pQInfo->query);
if (!vnodeIsQInfoValid(pQInfo) || (pQuery->sdata == NULL)) {
dError("QInfo:%p %p retrieve memory is corrupted!!! QInfo:%p, sign:%p, sdata:%p", pQInfo, pQuery, pQInfo,
pQInfo->signature, pQuery->sdata);
return TSDB_CODE_INVALID_QHANDLE;
}
if (pQInfo->killed) {
dTrace("QInfo:%p query is killed, %p, code:%d", pQInfo, pQuery, pQInfo->code);
if (pQInfo->code == TSDB_CODE_SUCCESS) {
return TSDB_CODE_QUERY_CANCELLED;
} else { // in case of not TSDB_CODE_SUCCESS, return the code to client
return abs(pQInfo->code);
}
}
sem_wait(&pQInfo->dataReady);
*numOfRows = pQInfo->pointsRead - pQInfo->pointsReturned;
*rowSize = pQuery->rowSize;
*timePrec = vnodeList[pQInfo->pObj->vnode].cfg.precision;
dTrace("QInfo:%p, retrieve data info completed, precision:%d, rowsize:%d, rows:%d, code:%d", pQInfo, *timePrec,
*rowSize, *numOfRows, pQInfo->code);
if (pQInfo->code < 0) { // less than 0 means there are error existed.
return -pQInfo->code;
}
return TSDB_CODE_SUCCESS;
}
// vnodeRetrieveQueryInfo must be called first
int vnodeSaveQueryResult(void *handle, char *data, int32_t *size) {
SQInfo *pQInfo = (SQInfo *)handle;
// the remained number of retrieved rows, not the interpolated result
int numOfRows = pQInfo->pointsRead - pQInfo->pointsReturned;
int32_t numOfFinal = vnodeCopyQueryResultToMsg(pQInfo, data, numOfRows);
pQInfo->pointsReturned += numOfFinal;
dTrace("QInfo:%p %d are returned, totalReturned:%d totalRead:%d", pQInfo, numOfFinal, pQInfo->pointsReturned,
pQInfo->pointsRead);
if (pQInfo->over == 0) {
#ifdef _TD_ARM_
dTrace("QInfo:%p set query flag, sig:%" PRIu64 ", func:vnodeSaveQueryResult", pQInfo, pQInfo->signature);
#else
dTrace("QInfo:%p set query flag, sig:%" PRIu64 ", func:%s", pQInfo, pQInfo->signature, __FUNCTION__);
#endif
if (pQInfo->killed == 1) {
dTrace("%p freed or killed, abort query", pQInfo);
} else {
vnodeAddRefCount(pQInfo);
dTrace("%p add query into task queue for schedule", pQInfo);
SSchedMsg schedMsg = {0};
if (pQInfo->pTableQuerySupporter != NULL) {
if (pQInfo->pTableQuerySupporter->pSidSet == NULL) {
schedMsg.fp = vnodeSingleTableQuery;
} else { // group by tag
schedMsg.fp = vnodeMultiMeterQuery;
}
} else {
pQInfo->bufIndex = pQInfo->bufIndex ^ 1; // exchange between 0 and 1
schedMsg.fp = vnodeQueryData;
}
schedMsg.msg = NULL;
schedMsg.thandle = (void *)1;
schedMsg.ahandle = pQInfo;
taosScheduleTask(tsQueryQhandle, &schedMsg);
}
}
return numOfFinal;
}
static int32_t validateQueryMeterMsg(SQueryMeterMsg *pQueryMsg) {
if (pQueryMsg->intervalTime < 0) {
dError("qmsg:%p illegal value of aggTimeInterval %" PRId64 "", pQueryMsg, pQueryMsg->intervalTime);
return -1;
}
if (pQueryMsg->numOfTagsCols < 0 || pQueryMsg->numOfTagsCols > TSDB_MAX_TAGS + 1) {
dError("qmsg:%p illegal value of numOfTagsCols %d", pQueryMsg, pQueryMsg->numOfTagsCols);
return -1;
}
if (pQueryMsg->numOfCols <= 0 || pQueryMsg->numOfCols > TSDB_MAX_COLUMNS) {
dError("qmsg:%p illegal value of numOfCols %d", pQueryMsg, pQueryMsg->numOfCols);
return -1;
}
if (pQueryMsg->numOfSids <= 0) {
dError("qmsg:%p illegal value of numOfSids %d", pQueryMsg, pQueryMsg->numOfSids);
return -1;
}
if (pQueryMsg->numOfGroupCols < 0) {
dError("qmsg:%p illegal value of numOfGroupbyCols %d", pQueryMsg, pQueryMsg->numOfGroupCols);
return -1;
}
if (pQueryMsg->numOfOutputCols > TSDB_MAX_COLUMNS || pQueryMsg->numOfOutputCols <= 0) {
dError("qmsg:%p illegal value of output columns %d", pQueryMsg, pQueryMsg->numOfOutputCols);
return -1;
}
if (pQueryMsg->tagLength < 0) {
dError("qmsg:%p illegal value of tag length %d", pQueryMsg, pQueryMsg->tagLength);
return -1;
}
return 0;
}
int32_t vnodeConvertQueryMeterMsg(SQueryMeterMsg *pQueryMsg) {
pQueryMsg->vnode = htons(pQueryMsg->vnode);
pQueryMsg->numOfSids = htonl(pQueryMsg->numOfSids);
#ifdef TSKEY32
pQueryMsg->skey = htonl(pQueryMsg->skey);
pQueryMsg->ekey = htonl(pQueryMsg->ekey);
#else
pQueryMsg->skey = htobe64(pQueryMsg->skey);
pQueryMsg->ekey = htobe64(pQueryMsg->ekey);
#endif
pQueryMsg->order = htons(pQueryMsg->order);
pQueryMsg->orderColId = htons(pQueryMsg->orderColId);
pQueryMsg->queryType = htons(pQueryMsg->queryType);
pQueryMsg->intervalTime = htobe64(pQueryMsg->intervalTime);
pQueryMsg->slidingTime = htobe64(pQueryMsg->slidingTime);
pQueryMsg->numOfTagsCols = htons(pQueryMsg->numOfTagsCols);
pQueryMsg->numOfCols = htons(pQueryMsg->numOfCols);
pQueryMsg->numOfOutputCols = htons(pQueryMsg->numOfOutputCols);
pQueryMsg->numOfGroupCols = htons(pQueryMsg->numOfGroupCols);
pQueryMsg->tagLength = htons(pQueryMsg->tagLength);
pQueryMsg->limit = htobe64(pQueryMsg->limit);
pQueryMsg->offset = htobe64(pQueryMsg->offset);
pQueryMsg->tsOffset = htonl(pQueryMsg->tsOffset);
pQueryMsg->tsLen = htonl(pQueryMsg->tsLen);
pQueryMsg->tsNumOfBlocks = htonl(pQueryMsg->tsNumOfBlocks);
pQueryMsg->tsOrder = htonl(pQueryMsg->tsOrder);
// query msg safety check
if (validateQueryMeterMsg(pQueryMsg) != 0) {
return TSDB_CODE_INVALID_QUERY_MSG;
}
SMeterSidExtInfo **pSids = NULL;
char * pMsg = (char *)(pQueryMsg->colList) + sizeof(SColumnInfo) * pQueryMsg->numOfCols;
for (int32_t col = 0; col < pQueryMsg->numOfCols; ++col) {
pQueryMsg->colList[col].colId = htons(pQueryMsg->colList[col].colId);
pQueryMsg->colList[col].type = htons(pQueryMsg->colList[col].type);
pQueryMsg->colList[col].bytes = htons(pQueryMsg->colList[col].bytes);
pQueryMsg->colList[col].numOfFilters = htons(pQueryMsg->colList[col].numOfFilters);
assert(pQueryMsg->colList[col].type >= TSDB_DATA_TYPE_BOOL && pQueryMsg->colList[col].type <= TSDB_DATA_TYPE_NCHAR);
int32_t numOfFilters = pQueryMsg->colList[col].numOfFilters;
if (numOfFilters > 0) {
pQueryMsg->colList[col].filters = calloc(numOfFilters, sizeof(SColumnFilterInfo));
}
for (int32_t f = 0; f < numOfFilters; ++f) {
SColumnFilterInfo *pFilterInfo = (SColumnFilterInfo *)pMsg;
SColumnFilterInfo *pDestFilterInfo = &pQueryMsg->colList[col].filters[f];
pDestFilterInfo->filterOnBinary = htons(pFilterInfo->filterOnBinary);
pMsg += sizeof(SColumnFilterInfo);
if (pDestFilterInfo->filterOnBinary) {
pDestFilterInfo->len = htobe64(pFilterInfo->len);
pDestFilterInfo->pz = (int64_t)calloc(1, pDestFilterInfo->len + 1);
memcpy((void*)pDestFilterInfo->pz, pMsg, pDestFilterInfo->len + 1);
pMsg += (pDestFilterInfo->len + 1);
} else {
pDestFilterInfo->lowerBndi = htobe64(pFilterInfo->lowerBndi);
pDestFilterInfo->upperBndi = htobe64(pFilterInfo->upperBndi);
}
pDestFilterInfo->lowerRelOptr = htons(pFilterInfo->lowerRelOptr);
pDestFilterInfo->upperRelOptr = htons(pFilterInfo->upperRelOptr);
}
}
bool hasArithmeticFunction = false;
/*
* 1. simple projection query on meters, we only record the pSqlFuncExprs[i].colIdx value
* 2. for complex queries, whole SqlExprs object is required.
*/
pQueryMsg->pSqlFuncExprs = (int64_t)malloc(POINTER_BYTES * pQueryMsg->numOfOutputCols);
SSqlFuncExprMsg *pExprMsg = (SSqlFuncExprMsg *)pMsg;
for (int32_t i = 0; i < pQueryMsg->numOfOutputCols; ++i) {
((SSqlFuncExprMsg **)pQueryMsg->pSqlFuncExprs)[i] = pExprMsg;
pExprMsg->colInfo.colIdx = htons(pExprMsg->colInfo.colIdx);
pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId);
pExprMsg->colInfo.flag = htons(pExprMsg->colInfo.flag);
pExprMsg->functionId = htons(pExprMsg->functionId);
pExprMsg->numOfParams = htons(pExprMsg->numOfParams);
pMsg += sizeof(SSqlFuncExprMsg);
for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) {
pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType);
pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes);
if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) {
pExprMsg->arg[j].argValue.pz = pMsg;
pMsg += pExprMsg->arg[j].argBytes + 1; // one more for the string terminated char.
} else {
pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64);
}
}
if (pExprMsg->functionId == TSDB_FUNC_ARITHM) {
hasArithmeticFunction = true;
} else if (pExprMsg->functionId == TSDB_FUNC_TAG ||
pExprMsg->functionId == TSDB_FUNC_TAGPRJ ||
pExprMsg->functionId == TSDB_FUNC_TAG_DUMMY) {
if (pExprMsg->colInfo.flag != TSDB_COL_TAG) { // ignore the column index check for arithmetic expression.
return TSDB_CODE_INVALID_QUERY_MSG;
}
} else {
if (!vnodeValidateExprColumnInfo(pQueryMsg, pExprMsg)) {
return TSDB_CODE_INVALID_QUERY_MSG;
}
}
pExprMsg = (SSqlFuncExprMsg *)pMsg;
}
pQueryMsg->colNameLen = htonl(pQueryMsg->colNameLen);
if (hasArithmeticFunction) { // column name array
assert(pQueryMsg->colNameLen > 0);
pQueryMsg->colNameList = (int64_t)pMsg;
pMsg += pQueryMsg->colNameLen;
}
pSids = (SMeterSidExtInfo **)calloc(pQueryMsg->numOfSids, sizeof(SMeterSidExtInfo *));
pQueryMsg->pSidExtInfo = (uint64_t)pSids;
pSids[0] = (SMeterSidExtInfo *)pMsg;
pSids[0]->sid = htonl(pSids[0]->sid);
pSids[0]->uid = htobe64(pSids[0]->uid);
pSids[0]->key = htobe64(pSids[0]->key);
for (int32_t j = 1; j < pQueryMsg->numOfSids; ++j) {
pSids[j] = (SMeterSidExtInfo *)((char *)pSids[j - 1] + sizeof(SMeterSidExtInfo) + pQueryMsg->tagLength);
pSids[j]->sid = htonl(pSids[j]->sid);
pSids[j]->uid = htobe64(pSids[j]->uid);
pSids[j]->key = htobe64(pSids[j]->key);
}
pMsg = (char *)pSids[pQueryMsg->numOfSids - 1];
pMsg += sizeof(SMeterSidExtInfo) + pQueryMsg->tagLength;
if (pQueryMsg->numOfGroupCols > 0 || pQueryMsg->numOfTagsCols > 0) { // group by tag columns
pQueryMsg->pTagSchema = (uint64_t)pMsg;
SSchema *pTagSchema = (SSchema *)pQueryMsg->pTagSchema;
pMsg += sizeof(SSchema) * pQueryMsg->numOfTagsCols;
if (pQueryMsg->numOfGroupCols > 0) {
pQueryMsg->groupbyTagIds = (uint64_t) & (pTagSchema[pQueryMsg->numOfTagsCols]);
} else {
pQueryMsg->groupbyTagIds = 0;
}
pQueryMsg->orderByIdx = htons(pQueryMsg->orderByIdx);
pQueryMsg->orderType = htons(pQueryMsg->orderType);
pMsg += sizeof(SColIndexEx) * pQueryMsg->numOfGroupCols;
} else {
pQueryMsg->pTagSchema = 0;
pQueryMsg->groupbyTagIds = 0;
}
pQueryMsg->interpoType = htons(pQueryMsg->interpoType);
if (pQueryMsg->interpoType != TSDB_INTERPO_NONE) {
pQueryMsg->defaultVal = (uint64_t)(pMsg);
int64_t *v = (int64_t *)pMsg;
for (int32_t i = 0; i < pQueryMsg->numOfOutputCols; ++i) {
v[i] = htobe64(v[i]);
}
}
dTrace("qmsg:%p query on %d meter(s), qrange:%" PRId64 "-%" PRId64 ", numOfGroupbyTagCols:%d, numOfTagCols:%d, timestamp order:%d, "
"tags order:%d, tags order col:%d, numOfOutputCols:%d, numOfCols:%d, interval:%" PRId64 ", fillType:%d, comptslen:%d, limit:%" PRId64 ", "
"offset:%" PRId64,
pQueryMsg, pQueryMsg->numOfSids, pQueryMsg->skey, pQueryMsg->ekey, pQueryMsg->numOfGroupCols,
pQueryMsg->numOfTagsCols, pQueryMsg->order, pQueryMsg->orderType, pQueryMsg->orderByIdx,
pQueryMsg->numOfOutputCols, pQueryMsg->numOfCols, pQueryMsg->intervalTime, pQueryMsg->interpoType,
pQueryMsg->tsLen, pQueryMsg->limit, pQueryMsg->offset);
return 0;
}
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "os.h"
#include "dnodeSystem.h"
#include "trpc.h"
#include "ttime.h"
#include "vnode.h"
#include "vnodeStore.h"
#include "vnodeUtil.h"
#include "vnodeStatus.h"
int tsMaxVnode = -1;
int tsOpenVnodes = 0;
SVnodeObj *vnodeList = NULL;
static int vnodeInitStoreVnode(int vnode) {
SVnodeObj *pVnode = vnodeList + vnode;
pVnode->vnode = vnode;
vnodeOpenMetersVnode(vnode);
if (pVnode->cfg.maxSessions <= 0) {
return TSDB_CODE_SUCCESS;
}
pVnode->firstKey = taosGetTimestamp(pVnode->cfg.precision);
pVnode->pCachePool = vnodeOpenCachePool(vnode);
if (pVnode->pCachePool == NULL) {
dError("vid:%d, cache pool init failed.", pVnode->vnode);
return TSDB_CODE_SERV_OUT_OF_MEMORY;
}
if (vnodeInitFile(vnode) != TSDB_CODE_SUCCESS) {
dError("vid:%d, files init failed.", pVnode->vnode);
return TSDB_CODE_VG_INIT_FAILED;
}
if (vnodeInitCommit(vnode) != TSDB_CODE_SUCCESS) {
dError("vid:%d, commit init failed.", pVnode->vnode);
return TSDB_CODE_VG_INIT_FAILED;
}
pthread_mutex_init(&(pVnode->vmutex), NULL);
dPrint("vid:%d, storage initialized, version:%" PRIu64 " fileId:%d numOfFiles:%d", vnode, pVnode->version, pVnode->fileId,
pVnode->numOfFiles);
return TSDB_CODE_SUCCESS;
}
int vnodeOpenVnode(int vnode) {
int32_t code = TSDB_CODE_SUCCESS;
SVnodeObj *pVnode = vnodeList + vnode;
pVnode->vnode = vnode;
pVnode->accessState = TSDB_VN_ALL_ACCCESS;
// vnode is empty
if (pVnode->cfg.maxSessions <= 0) {
return TSDB_CODE_SUCCESS;
}
if (!(pVnode->vnodeStatus == TSDB_VN_STATUS_OFFLINE || pVnode->vnodeStatus == TSDB_VN_STATUS_CREATING)) {
dError("vid:%d, status:%s, cannot enter open operation", vnode, taosGetVnodeStatusStr(pVnode->vnodeStatus));
return TSDB_CODE_INVALID_VNODE_STATUS;
}
dPrint("vid:%d, status:%s, start to open", vnode, taosGetVnodeStatusStr(pVnode->vnodeStatus));
pthread_mutex_lock(&dmutex);
// not enough memory, abort
if ((code = vnodeOpenShellVnode(vnode)) != TSDB_CODE_SUCCESS) {
pthread_mutex_unlock(&dmutex);
return code;
}
vnodeOpenPeerVnode(vnode);
if (vnode > tsMaxVnode) tsMaxVnode = vnode;
vnodeCalcOpenVnodes();
pthread_mutex_unlock(&dmutex);
#ifndef CLUSTER
vnodeOpenStreams(pVnode, NULL);
#endif
dPrint("vid:%d, vnode is opened, openVnodes:%d, status:%s", vnode, tsOpenVnodes, taosGetVnodeStatusStr(pVnode->vnodeStatus));
return TSDB_CODE_SUCCESS;
}
static int32_t vnodeMarkAllMetersDropped(SVnodeObj* pVnode) {
if (pVnode->meterList == NULL) {
return TSDB_CODE_SUCCESS;
}
bool ready = true;
for (int sid = 0; sid < pVnode->cfg.maxSessions; ++sid) {
if (!vnodeIsSafeToDeleteMeter(pVnode, sid)) {
ready = false;
} else { // set the meter is to be deleted
SMeterObj* pObj = pVnode->meterList[sid];
if (pObj != NULL) {
pObj->state = TSDB_METER_STATE_DROPPED;
}
}
}
return ready? TSDB_CODE_SUCCESS:TSDB_CODE_ACTION_IN_PROGRESS;
}
static int vnodeCloseVnode(int vnode) {
if (vnodeList == NULL) return TSDB_CODE_SUCCESS;
SVnodeObj* pVnode = &vnodeList[vnode];
pthread_mutex_lock(&dmutex);
if (pVnode->cfg.maxSessions == 0) {
pthread_mutex_unlock(&dmutex);
return TSDB_CODE_SUCCESS;
}
if (pVnode->vnodeStatus == TSDB_VN_STATUS_DELETING) {
dPrint("vid:%d, status:%s, another thread performed delete operation", vnode, taosGetVnodeStatusStr(pVnode->vnodeStatus));
return TSDB_CODE_SUCCESS;
} else {
dPrint("vid:%d, status:%s, enter close operation", vnode, taosGetVnodeStatusStr(pVnode->vnodeStatus));
pVnode->vnodeStatus = TSDB_VN_STATUS_CLOSING;
}
// set the meter is dropped flag
if (vnodeMarkAllMetersDropped(pVnode) != TSDB_CODE_SUCCESS) {
pthread_mutex_unlock(&dmutex);
return TSDB_CODE_ACTION_IN_PROGRESS;
}
dPrint("vid:%d, status:%s, enter delete operation", vnode, taosGetVnodeStatusStr(pVnode->vnodeStatus));
pVnode->vnodeStatus = TSDB_VN_STATUS_DELETING;
vnodeCloseStream(vnodeList + vnode);
vnodeCancelCommit(vnodeList + vnode);
vnodeClosePeerVnode(vnode);
vnodeCloseMetersVnode(vnode);
vnodeCloseShellVnode(vnode);
vnodeCloseCachePool(vnode);
vnodeCleanUpCommit(vnode);
pthread_mutex_destroy(&(vnodeList[vnode].vmutex));
if (tsMaxVnode == vnode) tsMaxVnode = vnode - 1;
tfree(vnodeList[vnode].meterIndex);
pthread_mutex_unlock(&dmutex);
return TSDB_CODE_SUCCESS;
}
int vnodeCreateVnode(int vnode, SVnodeCfg *pCfg, SVPeerDesc *pDesc) {
char fileName[128];
if (vnodeList[vnode].vnodeStatus != TSDB_VN_STATUS_OFFLINE) {
dError("vid:%d, status:%s, cannot enter create operation", vnode, taosGetVnodeStatusStr(vnodeList[vnode].vnodeStatus));
return TSDB_CODE_INVALID_VNODE_STATUS;
}
vnodeList[vnode].vnodeStatus = TSDB_VN_STATUS_CREATING;
sprintf(fileName, "%s/vnode%d", tsDirectory, vnode);
if (mkdir(fileName, 0755) != 0) {
dError("failed to create vnode:%d directory:%s, errno:%d, reason:%s", vnode, fileName, errno, strerror(errno));
if (errno == EACCES) {
return TSDB_CODE_NO_DISK_PERMISSIONS;
} else if (errno == ENOSPC) {
return TSDB_CODE_SERV_NO_DISKSPACE;
} else if (errno == EEXIST) {
} else {
return TSDB_CODE_VG_INIT_FAILED;
}
}
sprintf(fileName, "%s/vnode%d/db", tsDirectory, vnode);
if (mkdir(fileName, 0755) != 0) {
dError("failed to create vnode:%d directory:%s, errno:%d, reason:%s", vnode, fileName, errno, strerror(errno));
if (errno == EACCES) {
return TSDB_CODE_NO_DISK_PERMISSIONS;
} else if (errno == ENOSPC) {
return TSDB_CODE_SERV_NO_DISKSPACE;
} else if (errno == EEXIST) {
} else {
return TSDB_CODE_VG_INIT_FAILED;
}
}
vnodeList[vnode].cfg = *pCfg;
int code = vnodeCreateMeterObjFile(vnode);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
code = vnodeSaveVnodeCfg(vnode, pCfg, pDesc);
if (code != TSDB_CODE_SUCCESS) {
return TSDB_CODE_VG_INIT_FAILED;
}
code = vnodeInitStoreVnode(vnode);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
return vnodeOpenVnode(vnode);
}
static void vnodeRemoveDataFiles(int vnode) {
char vnodeDir[TSDB_FILENAME_LEN];
char dfilePath[TSDB_FILENAME_LEN];
char linkFile[TSDB_FILENAME_LEN];
struct dirent *de = NULL;
DIR * dir = NULL;
sprintf(vnodeDir, "%s/vnode%d/db", tsDirectory, vnode);
dir = opendir(vnodeDir);
if (dir == NULL) return;
while ((de = readdir(dir)) != NULL) {
if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0) continue;
if ((strcmp(de->d_name + strlen(de->d_name) - strlen(".head"), ".head") == 0 ||
strcmp(de->d_name + strlen(de->d_name) - strlen(".data"), ".data") == 0 ||
strcmp(de->d_name + strlen(de->d_name) - strlen(".last"), ".last") == 0) &&
(de->d_type & DT_LNK)) {
sprintf(linkFile, "%s/%s", vnodeDir, de->d_name);
if (!vnodeRemoveDataFileFromLinkFile(linkFile, de->d_name)) {
continue;
}
memset(dfilePath, 0, TSDB_FILENAME_LEN);
int tcode = readlink(linkFile, dfilePath, TSDB_FILENAME_LEN);
remove(linkFile);
if (tcode >= 0) {
remove(dfilePath);
dPrint("Data file %s is removed, link file %s", dfilePath, linkFile);
}
} else {
remove(de->d_name);
}
}
closedir(dir);
rmdir(vnodeDir);
sprintf(vnodeDir, "%s/vnode%d/meterObj.v%d", tsDirectory, vnode, vnode);
remove(vnodeDir);
sprintf(vnodeDir, "%s/vnode%d", tsDirectory, vnode);
rmdir(vnodeDir);
dPrint("vid:%d, vnode is removed, status:%s", vnode, taosGetVnodeStatusStr(vnodeList[vnode].vnodeStatus));
}
int vnodeRemoveVnode(int vnode) {
if (vnodeList == NULL) return TSDB_CODE_SUCCESS;
if (vnodeList[vnode].cfg.maxSessions > 0) {
SVnodeObj* pVnode = &vnodeList[vnode];
if (pVnode->vnodeStatus == TSDB_VN_STATUS_CREATING
|| pVnode->vnodeStatus == TSDB_VN_STATUS_OFFLINE
|| pVnode->vnodeStatus == TSDB_VN_STATUS_DELETING) {
dTrace("vid:%d, status:%s, cannot enter close/delete operation", vnode, taosGetVnodeStatusStr(pVnode->vnodeStatus));
return TSDB_CODE_ACTION_IN_PROGRESS;
} else {
int32_t ret = vnodeCloseVnode(vnode);
if (ret != TSDB_CODE_SUCCESS) {
return ret;
}
dTrace("vid:%d, status:%s, do delete operation", vnode, taosGetVnodeStatusStr(pVnode->vnodeStatus));
vnodeRemoveDataFiles(vnode);
}
} else {
dPrint("vid:%d, max sessions:%d, this vnode already dropped!!!", vnode, vnodeList[vnode].cfg.maxSessions);
vnodeList[vnode].cfg.maxSessions = 0; //reset value
vnodeCalcOpenVnodes();
}
return TSDB_CODE_SUCCESS;
}
int vnodeInitStore() {
int vnode;
int size;
size = sizeof(SVnodeObj) * TSDB_MAX_VNODES;
vnodeList = (SVnodeObj *)malloc(size);
if (vnodeList == NULL) return -1;
memset(vnodeList, 0, size);
if (vnodeInitInfo() < 0) return -1;
for (vnode = 0; vnode < TSDB_MAX_VNODES; ++vnode) {
int code = vnodeInitStoreVnode(vnode);
if (code != TSDB_CODE_SUCCESS) {
// one vnode is failed to recover from commit log, continue for remain
return -1;
}
}
return 0;
}
int vnodeInitVnodes() {
int vnode;
for (vnode = 0; vnode < TSDB_MAX_VNODES; ++vnode) {
if (vnodeOpenVnode(vnode) < 0) return -1;
}
return 0;
}
void vnodeCleanUpOneVnode(int vnode) {
static int again = 0;
if (vnodeList == NULL) return;
pthread_mutex_lock(&dmutex);
if (again) {
pthread_mutex_unlock(&dmutex);
return;
}
again = 1;
if (vnodeList[vnode].pCachePool) {
vnodeList[vnode].vnodeStatus = TSDB_VN_STATUS_OFFLINE;
vnodeClosePeerVnode(vnode);
}
pthread_mutex_unlock(&dmutex);
if (vnodeList[vnode].pCachePool) {
vnodeProcessCommitTimer(vnodeList + vnode, NULL);
while (vnodeList[vnode].commitThread != 0) {
taosMsleep(10);
}
vnodeCleanUpCommit(vnode);
}
}
void vnodeCleanUpVnodes() {
static int again = 0;
if (vnodeList == NULL) return;
pthread_mutex_lock(&dmutex);
if (again) {
pthread_mutex_unlock(&dmutex);
return;
}
again = 1;
for (int vnode = 0; vnode < TSDB_MAX_VNODES; ++vnode) {
if (vnodeList[vnode].pCachePool) {
vnodeList[vnode].vnodeStatus = TSDB_VN_STATUS_OFFLINE;
vnodeClosePeerVnode(vnode);
}
}
pthread_mutex_unlock(&dmutex);
for (int vnode = 0; vnode < TSDB_MAX_VNODES; ++vnode) {
if (vnodeList[vnode].pCachePool) {
vnodeProcessCommitTimer(vnodeList + vnode, NULL);
while (vnodeList[vnode].commitThread != 0) {
taosMsleep(10);
}
vnodeCleanUpCommit(vnode);
}
}
}
void vnodeCalcOpenVnodes() {
int openVnodes = 0;
for (int vnode = 0; vnode <= tsMaxVnode; ++vnode) {
if (vnodeList[vnode].cfg.maxSessions <= 0) continue;
openVnodes++;
}
atomic_store_32(&tsOpenVnodes, openVnodes);
}
void vnodeUpdateHeadFile(int vnode, int oldTables, int newTables) {
//todo rewrite the head file with newTables
}
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include <stdbool.h>
int vnodeInitInfo() { return 0; }
bool vnodeRemoveDataFileFromLinkFile(char* linkFile, char* de_name) { return true; }
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "taosmsg.h"
#include "vnode.h"
#include "vnodeUtil.h"
#include "vnodeStatus.h"
/* static TAOS *dbConn = NULL; */
void vnodeCloseStreamCallback(void *param);
void vnodeProcessStreamRes(void *param, TAOS_RES *tres, TAOS_ROW row) {
SMeterObj *pObj = (SMeterObj *)param;
dTrace("vid:%d sid:%d id:%s, stream result is ready", pObj->vnode, pObj->sid, pObj->meterId);
// construct data
int32_t contLen = pObj->bytesPerPoint;
char * pTemp = calloc(1, sizeof(SSubmitMsg) + pObj->bytesPerPoint + sizeof(SVMsgHeader));
SSubmitMsg *pMsg = (SSubmitMsg *)(pTemp + sizeof(SVMsgHeader));
pMsg->numOfRows = htons(1);
char ncharBuf[TSDB_MAX_BYTES_PER_ROW] = {0};
int32_t offset = 0;
for (int32_t i = 0; i < pObj->numOfColumns; ++i) {
char *dst = row[i];
if (dst == NULL) {
setNull(pMsg->payLoad + offset, pObj->schema[i].type, pObj->schema[i].bytes);
} else {
// here, we need to transfer nchar(utf8) to unicode(ucs-4)
if (pObj->schema[i].type == TSDB_DATA_TYPE_NCHAR) {
taosMbsToUcs4(row[i], pObj->schema[i].bytes, ncharBuf, TSDB_MAX_BYTES_PER_ROW);
dst = ncharBuf;
}
memcpy(pMsg->payLoad + offset, dst, pObj->schema[i].bytes);
}
offset += pObj->schema[i].bytes;
}
contLen += sizeof(SSubmitMsg);
int32_t numOfPoints = 0;
int32_t code = vnodeInsertPoints(pObj, (char *)pMsg, contLen, TSDB_DATA_SOURCE_SHELL, NULL, pObj->sversion,
&numOfPoints, taosGetTimestamp(vnodeList[pObj->vnode].cfg.precision));
if (code != TSDB_CODE_SUCCESS) {
dError("vid:%d sid:%d id:%s, failed to insert continuous query results", pObj->vnode, pObj->sid, pObj->meterId);
}
assert(numOfPoints >= 0 && numOfPoints <= 1);
tfree(pTemp);
}
static void vnodeGetDBFromMeterId(SMeterObj *pObj, char *db) {
char *st = strstr(pObj->meterId, ".");
char *end = strstr(st + 1, ".");
memcpy(db, st + 1, end - (st + 1));
}
void vnodeOpenStreams(void *param, void *tmrId) {
SVnodeObj *pVnode = (SVnodeObj *)param;
SMeterObj *pObj;
if (pVnode->streamRole == TSDB_VN_STREAM_STATUS_STOP) return;
if (pVnode->meterList == NULL) return;
taosTmrStopA(&pVnode->streamTimer);
pVnode->streamTimer = NULL;
for (int sid = 0; sid < pVnode->cfg.maxSessions; ++sid) {
pObj = pVnode->meterList[sid];
if (pObj == NULL || pObj->sqlLen == 0 || vnodeIsMeterState(pObj, TSDB_METER_STATE_DROPPING)) continue;
dTrace("vid:%d sid:%d id:%s, open stream:%s", pObj->vnode, sid, pObj->meterId, pObj->pSql);
if (pVnode->dbConn == NULL) {
char db[64] = {0};
char user[64] = {0};
vnodeGetDBFromMeterId(pObj, db);
sprintf(user, "_%s", pVnode->cfg.acct);
pVnode->dbConn = taos_connect(NULL, user, tsInternalPass, db, 0);
}
if (pVnode->dbConn == NULL) {
dError("vid:%d, failed to connect to mgmt node", pVnode->vnode);
taosTmrReset(vnodeOpenStreams, 1000, param, vnodeTmrCtrl, &pVnode->streamTimer);
return;
}
if (pObj->pStream == NULL) {
pObj->pStream = taos_open_stream(pVnode->dbConn, pObj->pSql, vnodeProcessStreamRes, pObj->lastKey, pObj,
vnodeCloseStreamCallback);
if (pObj->pStream) pVnode->numOfStreams++;
}
}
}
void vnodeCreateStream(SMeterObj *pObj) {
if (pObj->sqlLen <= 0) return;
SVnodeObj *pVnode = vnodeList + pObj->vnode;
if (pVnode->streamRole == TSDB_VN_STREAM_STATUS_STOP) return;
if (pObj->pStream) return;
dTrace("vid:%d sid:%d id:%s stream:%s is created", pObj->vnode, pObj->sid, pObj->meterId, pObj->pSql);
if (pVnode->dbConn == NULL) {
if (pVnode->streamTimer == NULL) taosTmrReset(vnodeOpenStreams, 1000, pVnode, vnodeTmrCtrl, &pVnode->streamTimer);
} else {
pObj->pStream = taos_open_stream(pVnode->dbConn, pObj->pSql, vnodeProcessStreamRes, pObj->lastKey, pObj,
vnodeCloseStreamCallback);
if (pObj->pStream) pVnode->numOfStreams++;
}
}
// Close only one stream
void vnodeRemoveStream(SMeterObj *pObj) {
SVnodeObj *pVnode = vnodeList + pObj->vnode;
if (pObj->sqlLen <= 0) return;
if (pObj->pStream) {
taos_close_stream(pObj->pStream);
pVnode->numOfStreams--;
}
pObj->pStream = NULL;
if (pVnode->numOfStreams == 0) {
taos_close(pVnode->dbConn);
pVnode->dbConn = NULL;
}
dTrace("vid:%d sid:%d id:%d stream is removed", pObj->vnode, pObj->sid, pObj->meterId);
}
// Close all streams in a vnode
void vnodeCloseStream(SVnodeObj *pVnode) {
SMeterObj *pObj;
dPrint("vid:%d, stream is closed, old role %s", pVnode->vnode, taosGetVnodeStreamStatusStr(pVnode->streamRole));
// stop stream computing
for (int sid = 0; sid < pVnode->cfg.maxSessions; ++sid) {
pObj = pVnode->meterList[sid];
if (pObj == NULL) continue;
if (pObj->sqlLen > 0 && pObj->pStream) {
taos_close_stream(pObj->pStream);
pVnode->numOfStreams--;
}
pObj->pStream = NULL;
}
}
void vnodeUpdateStreamRole(SVnodeObj *pVnode) {
/* SMeterObj *pObj; */
int newRole = (pVnode->vnodeStatus == TSDB_VN_STATUS_MASTER) ? TSDB_VN_STREAM_STATUS_START : TSDB_VN_STREAM_STATUS_STOP;
if (newRole != pVnode->streamRole) {
dPrint("vid:%d, stream role is changed from %s to %s",
pVnode->vnode, taosGetVnodeStreamStatusStr(pVnode->streamRole), taosGetVnodeStreamStatusStr(newRole));
pVnode->streamRole = newRole;
if (newRole == TSDB_VN_STREAM_STATUS_START) {
vnodeOpenStreams(pVnode, NULL);
} else {
vnodeCloseStream(pVnode);
}
} else {
dPrint("vid:%d, stream role is keep to %s", pVnode->vnode, taosGetVnodeStreamStatusStr(pVnode->streamRole));
}
}
// Callback function called from client
void vnodeCloseStreamCallback(void *param) {
SMeterObj *pTable = (SMeterObj *)param;
SVnodeObj *pVnode = NULL;
if (pTable == NULL || pTable->sqlLen == 0) return;
pVnode = vnodeList + pTable->vnode;
pTable->sqlLen = 0;
pTable->pSql = NULL;
pTable->pStream = NULL;
pVnode->numOfStreams--;
if (pVnode->numOfStreams == 0) {
taos_close(pVnode->dbConn);
pVnode->dbConn = NULL;
}
vnodeSaveMeterObjToFile(pTable);
}
\ No newline at end of file
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "mnode.h"
#include "os.h"
#include "qast.h"
#include "qextbuffer.h"
#include "tschemautil.h"
#include "tsqlfunction.h"
typedef struct SSyntaxTreeFilterSupporter {
SSchema* pTagSchema;
int32_t numOfTags;
int32_t optr;
} SSyntaxTreeFilterSupporter;
typedef struct SJoinSupporter {
void** val;
void** pTabObjs;
int32_t size;
int16_t type;
int16_t colIndex;
void** qualMeterObj;
int32_t qualSize;
} SJoinSupporter;
typedef struct SMeterNameFilterSupporter {
SPatternCompareInfo info;
char* pattern;
} SMeterNameFilterSupporter;
static void tansformQueryResult(tQueryResultset* pRes);
static bool tSkipListNodeFilterCallback(const void *pNode, void *param);
static int32_t tabObjVGIDComparator(const void* pLeft, const void* pRight) {
STabObj* p1 = *(STabObj**)pLeft;
STabObj* p2 = *(STabObj**)pRight;
int32_t ret = p1->gid.vgId - p2->gid.vgId;
if (ret == 0) {
return ret;
} else {
return ret > 0 ? 1 : -1;
}
}
// monotonic inc in memory address
static int32_t tabObjPointerComparator(const void* pLeft, const void* pRight) {
int64_t ret = (*(STabObj**)(pLeft))->uid - (*(STabObj**)(pRight))->uid;
if (ret == 0) {
return 0;
} else {
return ret > 0 ? 1 : -1;
}
}
static int32_t tabObjResultComparator(const void* p1, const void* p2, void* param) {
tOrderDescriptor* pOrderDesc = (tOrderDescriptor*)param;
STabObj* pNode1 = (STabObj*)p1;
STabObj* pNode2 = (STabObj*)p2;
for (int32_t i = 0; i < pOrderDesc->orderIdx.numOfCols; ++i) {
int32_t colIdx = pOrderDesc->orderIdx.pData[i];
char* f1 = NULL;
char* f2 = NULL;
SSchema schema = {0};
if (colIdx == -1) {
f1 = pNode1->meterId;
f2 = pNode2->meterId;
schema.type = TSDB_DATA_TYPE_BINARY;
schema.bytes = TSDB_TABLE_ID_LEN;
} else {
f1 = mgmtMeterGetTag(pNode1, colIdx, NULL);
f2 = mgmtMeterGetTag(pNode2, colIdx, &schema);
SSchema* pSchema = getColumnModelSchema(pOrderDesc->pColumnModel, colIdx);
assert(schema.type == pSchema->type);
}
int32_t ret = doCompare(f1, f2, schema.type, schema.bytes);
if (ret == 0) {
continue;
} else {
return ret;
}
}
return 0;
}
/**
* update the tag order index according to the tags column index. The tags column index needs to be checked one-by-one,
* since the normal columns may be passed to server for handling the group by on status column.
*
* @param pSuperTableMetaMsg
* @param tableIndex
* @param pOrderIndexInfo
* @param numOfTags
*/
static void mgmtUpdateOrderTagColIndex(SMetricMetaMsg* pMetricMetaMsg, int32_t tableIndex, SColumnOrderInfo* pOrderIndexInfo,
int32_t numOfTags) {
SMetricMetaElemMsg* pElem = (SMetricMetaElemMsg*)((char*)pSuperTableMetaMsg + pSuperTableMetaMsg->metaElem[tableIndex]);
SColIndexEx* groupColumnList = (SColIndexEx*)((char*)pSuperTableMetaMsg + pElem->groupbyTagColumnList);
int32_t numOfGroupbyTags = 0;
for (int32_t i = 0; i < pElem->numOfGroupCols; ++i) {
if (groupColumnList[i].flag == TSDB_COL_TAG) { // ignore this column if it is not a tag column.
pOrderIndexInfo->pData[numOfGroupbyTags++] = groupColumnList[i].colIdx;
assert(groupColumnList[i].colIdx < numOfTags);
}
}
pOrderIndexInfo->numOfCols = numOfGroupbyTags;
}
// todo merge sort function with losertree used
void mgmtReorganizeMetersInMetricMeta(SSuperTableMetaMsg* pSuperTableMetaMsg, int32_t tableIndex, tQueryResultset* pRes) {
if (pRes->num <= 0) { // no result, no need to pagination
return;
}
SMetricMetaElemMsg* pElem = (SMetricMetaElemMsg*)((char*)pSuperTableMetaMsg + pSuperTableMetaMsg->metaElem[tableIndex]);
STabObj* pMetric = mgmtGetTable(pElem->meterId);
SSchema* pTagSchema = (SSchema*)(pMetric->schema + pMetric->numOfColumns * sizeof(SSchema));
/*
* To apply the group limitation and group offset, we should sort the result
* list according to the order condition
*/
tOrderDescriptor* descriptor =
(tOrderDescriptor*)calloc(1, sizeof(tOrderDescriptor) + sizeof(int32_t) * pElem->numOfGroupCols);
descriptor->pColumnModel = createColumnModel(pTagSchema, pMetric->numOfTags, 1);
descriptor->orderIdx.numOfCols = pElem->numOfGroupCols;
int32_t* startPos = NULL;
int32_t numOfSubset = 1;
mgmtUpdateOrderTagColIndex(pMetricMetaMsg, tableIndex, &descriptor->orderIdx, pMetric->numOfTags);
if (descriptor->orderIdx.numOfCols > 0) {
tQSortEx(pRes->pRes, POINTER_BYTES, 0, pRes->num - 1, descriptor, tabObjResultComparator);
startPos = calculateSubGroup(pRes->pRes, pRes->num, &numOfSubset, descriptor, tabObjResultComparator);
} else {
startPos = malloc(2 * sizeof(int32_t));
startPos[0] = 0;
startPos[1] = (int32_t)pRes->num;
}
/*
* sort the result according to vgid to ensure meters with the same vgid is
* continuous in the result list
*/
qsort(pRes->pRes, (size_t)pRes->num, POINTER_BYTES, tabObjVGIDComparator);
free(descriptor->pColumnModel);
free(descriptor);
free(startPos);
}
static void mgmtRetrieveByMeterName(tQueryResultset* pRes, char* str, STabObj* pMetric) {
const char* sep = ",";
char* pToken = NULL;
int32_t s = 4; // initial size
pRes->pRes = malloc(sizeof(char*) * s);
pRes->num = 0;
for (pToken = strsep(&str, sep); pToken != NULL; pToken = strsep(&str, sep)) {
STabObj* pMeterObj = mgmtGetTable(pToken);
if (pMeterObj == NULL) {
mWarn("metric:%s error in metric query expression, invalid meter id:%s", pMetric->meterId, pToken);
continue;
}
if (pRes->num >= s) {
s += (s >> 1); // increase 50% size
pRes->pRes = realloc(pRes->pRes, sizeof(char*) * s);
}
/* not a table created from metric, ignore */
if (pMeterObj->tableType != TSDB_TABLE_TYPE_CHILD_TABLE) {
continue;
}
/*
* queried meter not belongs to this metric, ignore, metric does not have
* uid, so compare according to meterid
*/
STabObj* parentMetric = mgmtGetTable(pMeterObj->pTagData);
if (strncasecmp(parentMetric->meterId, pMetric->meterId, TSDB_TABLE_ID_LEN) != 0 ||
(parentMetric->uid != pMetric->uid)) {
continue;
}
pRes->pRes[pRes->num++] = pMeterObj;
}
}
static bool mgmtTablenameFilterCallback(tSkipListNode* pNode, void* param) {
SMeterNameFilterSupporter* pSupporter = (SMeterNameFilterSupporter*)param;
char name[TSDB_TABLE_ID_LEN] = {0};
// pattern compare for meter name
STabObj* pMeterObj = (STabObj*)pNode->pData;
extractTableName(pMeterObj->meterId, name);
return patternMatch(pSupporter->pattern, name, TSDB_TABLE_ID_LEN, &pSupporter->info) == TSDB_PATTERN_MATCH;
}
static void mgmtRetrieveFromLikeOptr(tQueryResultset* pRes, const char* str, STabObj* pMetric) {
SPatternCompareInfo info = PATTERN_COMPARE_INFO_INITIALIZER;
SMeterNameFilterSupporter supporter = {info, (char*) str};
pRes->num =
tSkipListIterateList(pMetric->pSkipList, (tSkipListNode***)&pRes->pRes, mgmtTablenameFilterCallback, &supporter);
}
static void mgmtFilterByTableNameCond(tQueryResultset* pRes, char* condStr, int32_t len, STabObj* pMetric) {
pRes->num = 0;
if (len <= 0) {
return;
}
char* str = calloc(1, (size_t)len + 1);
memcpy(str, condStr, len);
if (strncasecmp(condStr, QUERY_COND_REL_PREFIX_IN, QUERY_COND_REL_PREFIX_IN_LEN) == 0) { // handle in expression
mgmtRetrieveByMeterName(pRes, str + QUERY_COND_REL_PREFIX_IN_LEN, pMetric);
} else { // handle like expression
assert(strncasecmp(str, QUERY_COND_REL_PREFIX_LIKE, QUERY_COND_REL_PREFIX_LIKE_LEN) == 0);
mgmtRetrieveFromLikeOptr(pRes, str + QUERY_COND_REL_PREFIX_LIKE_LEN, pMetric);
tansformQueryResult(pRes);
}
free(str);
}
UNUSED_FUNC static bool mgmtJoinFilterCallback(tSkipListNode* pNode, void* param) {
SJoinSupporter* pSupporter = (SJoinSupporter*)param;
SSchema s = {0};
char* v = mgmtTableGetTag((STabObj*)pNode->pData, pSupporter->colIndex, &s);
for (int32_t i = 0; i < pSupporter->size; ++i) {
int32_t ret = doCompare(v, pSupporter->val[i], pSupporter->type, s.bytes);
if (ret == 0) {
pSupporter->qualMeterObj[pSupporter->qualSize++] = pSupporter->pTabObjs[i];
/*
* Once a value is qualified according to the join condition, it is remove from the
* candidate list, as well as its corresponding meter object.
*
* last one does not need to move forward
*/
if (i < pSupporter->size - 1) {
memmove(pSupporter->val[i], pSupporter->val[i + 1], pSupporter->size - (i + 1));
}
pSupporter->size -= 1;
return true;
}
}
return false;
}
static void orderResult(SSuperTableMetaMsg* pSuperTableMetaMsg, tQueryResultset* pRes, int16_t colIndex, int32_t tableIndex) {
SMetricMetaElemMsg* pElem = (SMetricMetaElemMsg*)((char*)pSuperTableMetaMsg + pSuperTableMetaMsg->metaElem[tableIndex]);
tOrderDescriptor* descriptor =
(tOrderDescriptor*)calloc(1, sizeof(tOrderDescriptor) + sizeof(int32_t) * 1); // only one column for join
STabObj* pMetric = mgmtGetTable(pElem->meterId);
SSchema* pTagSchema = (SSchema*)(pMetric->schema + pMetric->numOfColumns * sizeof(SSchema));
descriptor->pColumnModel = createColumnModel(pTagSchema, pMetric->numOfTags, 1);
descriptor->orderIdx.pData[0] = colIndex;
descriptor->orderIdx.numOfCols = 1;
// sort results list
tQSortEx(pRes->pRes, POINTER_BYTES, 0, pRes->num - 1, descriptor, tabObjResultComparator);
free(descriptor->pColumnModel);
free(descriptor);
}
// check for duplicate join tags
static int32_t mgmtCheckForDuplicateTagValue(tQueryResultset* pRes, int32_t index, int32_t tagCol) {
SSchema s = {0};
for (int32_t k = 1; k < pRes[index].num; ++k) {
STabObj* pObj1 = pRes[index].pRes[k - 1];
STabObj* pObj2 = pRes[index].pRes[k];
char* val1 = mgmtTableGetTag(pObj1, tagCol, &s);
char* val2 = mgmtTableGetTag(pObj2, tagCol, NULL);
if (doCompare(val1, val2, s.type, s.bytes) == 0) {
return TSDB_CODE_DUPLICATE_TAGS;
}
}
return TSDB_CODE_SUCCESS;
}
int32_t mgmtDoJoin(SSuperTableMetaMsg* pSuperTableMetaMsg, tQueryResultset* pRes) {
if (pSuperTableMetaMsg->numOfMeters == 1) {
return TSDB_CODE_SUCCESS;
}
bool allEmpty = false;
for (int32_t i = 0; i < pSuperTableMetaMsg->numOfMeters; ++i) {
if (pRes[i].num == 0) { // all results are empty if one of them is empty
allEmpty = true;
break;
}
}
if (allEmpty) {
for (int32_t i = 0; i < pSuperTableMetaMsg->numOfMeters; ++i) {
pRes[i].num = 0;
tfree(pRes[i].pRes);
}
return TSDB_CODE_SUCCESS;
}
char* cond = (char*)pSuperTableMetaMsg + pSuperTableMetaMsg->join;
char left[TSDB_TABLE_ID_LEN + 1] = {0};
strcpy(left, cond);
int16_t leftTagColIndex = *(int16_t*)(cond + TSDB_TABLE_ID_LEN);
char right[TSDB_TABLE_ID_LEN + 1] = {0};
strcpy(right, cond + TSDB_TABLE_ID_LEN + sizeof(int16_t));
int16_t rightTagColIndex = *(int16_t*)(cond + TSDB_TABLE_ID_LEN * 2 + sizeof(int16_t));
STabObj* pLeftMetric = mgmtGetTable(left);
STabObj* pRightMetric = mgmtGetTable(right);
// decide the pRes belongs to
int32_t leftIndex = 0;
int32_t rightIndex = 0;
for (int32_t i = 0; i < pSuperTableMetaMsg->numOfMeters; ++i) {
STabObj* pObj = (STabObj*)pRes[i].pRes[0];
STabObj* pMetric1 = mgmtGetTable(pObj->pTagData);
if (pMetric1 == pLeftMetric) {
leftIndex = i;
} else if (pMetric1 == pRightMetric) {
rightIndex = i;
}
}
orderResult(pSuperTableMetaMsg, &pRes[leftIndex], leftTagColIndex, leftIndex);
orderResult(pSuperTableMetaMsg, &pRes[rightIndex], rightTagColIndex, rightIndex);
int32_t i = 0;
int32_t j = 0;
SSchema s = {0};
int32_t res = 0;
// check for duplicated tag values
int32_t ret1 = mgmtCheckForDuplicateTagValue(pRes, leftIndex, leftTagColIndex);
int32_t ret2 = mgmtCheckForDuplicateTagValue(pRes, rightIndex, rightTagColIndex);
if (ret1 != TSDB_CODE_SUCCESS || ret2 != TSDB_CODE_SUCCESS) {
return ret1;
}
while (i < pRes[leftIndex].num && j < pRes[rightIndex].num) {
STabObj* pLeftObj = pRes[leftIndex].pRes[i];
STabObj* pRightObj = pRes[rightIndex].pRes[j];
char* v1 = mgmtTableGetTag(pLeftObj, leftTagColIndex, &s);
char* v2 = mgmtTableGetTag(pRightObj, rightTagColIndex, NULL);
int32_t ret = doCompare(v1, v2, s.type, s.bytes);
if (ret == 0) { // qualified
pRes[leftIndex].pRes[res] = pRes[leftIndex].pRes[i++];
pRes[rightIndex].pRes[res] = pRes[rightIndex].pRes[j++];
res++;
} else if (ret < 0) {
i++;
} else {
j++;
}
}
pRes[leftIndex].num = res;
pRes[rightIndex].num = res;
return TSDB_CODE_SUCCESS;
}
/**
* convert the result pointer to STabObj instead of tSkipListNode
* @param pRes
*/
static void tansformQueryResult(tQueryResultset* pRes) {
if (pRes == NULL || pRes->num == 0) {
return;
}
for (int32_t i = 0; i < pRes->num; ++i) {
pRes->pRes[i] = ((tSkipListNode*)(pRes->pRes[i]))->pData;
}
}
static tQueryResultset* doNestedLoopIntersect(tQueryResultset* pRes1, tQueryResultset* pRes2) {
int32_t num = 0;
void** pResult = pRes1->pRes;
for (int32_t i = 0; i < pRes1->num; ++i) {
for (int32_t j = 0; j < pRes2->num; ++j) {
if (pRes1->pRes[i] == pRes2->pRes[j]) {
pResult[num++] = pRes1->pRes[i];
break;
}
}
}
tQueryResultClean(pRes2);
memset(pRes1->pRes + num, 0, sizeof(void*) * (pRes1->num - num));
pRes1->num = num;
return pRes1;
}
static tQueryResultset* doSortIntersect(tQueryResultset* pRes1, tQueryResultset* pRes2) {
size_t sizePtr = sizeof(void *);
qsort(pRes1->pRes, pRes1->num, sizePtr, tabObjPointerComparator);
qsort(pRes2->pRes, pRes2->num, sizePtr, tabObjPointerComparator);
int32_t i = 0;
int32_t j = 0;
int32_t num = 0;
while (i < pRes1->num && j < pRes2->num) {
if (pRes1->pRes[i] == pRes2->pRes[j]) {
j++;
pRes1->pRes[num++] = pRes1->pRes[i++];
} else if (pRes1->pRes[i] < pRes2->pRes[j]) {
i++;
} else {
j++;
}
}
tQueryResultClean(pRes2);
memset(pRes1->pRes + num, 0, sizeof(void*) * (pRes1->num - num));
pRes1->num = num;
return pRes1;
}
static void queryResultIntersect(tQueryResultset* pFinalRes, tQueryResultset* pRes) {
const int32_t NUM_OF_RES_THRESHOLD = 20;
// for small result, use nested loop join
if (pFinalRes->num <= NUM_OF_RES_THRESHOLD && pRes->num <= NUM_OF_RES_THRESHOLD) {
doNestedLoopIntersect(pFinalRes, pRes);
} else { // for larger result, sort merge is employed
doSortIntersect(pFinalRes, pRes);
}
}
static void queryResultUnion(tQueryResultset* pFinalRes, tQueryResultset* pRes) {
if (pRes->num == 0) {
tQueryResultClean(pRes);
return;
}
int32_t total = pFinalRes->num + pRes->num;
void* tmp = realloc(pFinalRes->pRes, total * POINTER_BYTES);
if (tmp == NULL) {
return;
}
pFinalRes->pRes = tmp;
memcpy(&pFinalRes->pRes[pFinalRes->num], pRes->pRes, POINTER_BYTES * pRes->num);
qsort(pFinalRes->pRes, total, POINTER_BYTES, tabObjPointerComparator);
int32_t num = 1;
for (int32_t i = 1; i < total; ++i) {
if (pFinalRes->pRes[i] != pFinalRes->pRes[i - 1]) {
pFinalRes->pRes[num++] = pFinalRes->pRes[i];
}
}
if (num < total) {
memset(&pFinalRes->pRes[num], 0, POINTER_BYTES * (total - num));
}
pFinalRes->num = num;
tQueryResultClean(pRes);
}
static int32_t compareIntVal(const void* pLeft, const void* pRight) {
DEFAULT_COMP(GET_INT64_VAL(pLeft), GET_INT64_VAL(pRight));
}
static int32_t compareIntDoubleVal(const void* pLeft, const void* pRight) {
DEFAULT_COMP(GET_INT64_VAL(pLeft), GET_DOUBLE_VAL(pRight));
}
static int32_t compareDoubleVal(const void* pLeft, const void* pRight) {
DEFAULT_COMP(GET_DOUBLE_VAL(pLeft), GET_DOUBLE_VAL(pRight));
}
static int32_t compareDoubleIntVal(const void* pLeft, const void* pRight) {
double ret = (*(double*)pLeft) - (*(int64_t*)pRight);
if (fabs(ret) < DBL_EPSILON) {
return 0;
} else {
return ret > 0 ? 1 : -1;
}
}
static int32_t compareStrVal(const void* pLeft, const void* pRight) {
int32_t ret = strcmp(pLeft, pRight);
if (ret == 0) {
return 0;
} else {
return ret > 0 ? 1 : -1;
}
}
static int32_t compareWStrVal(const void* pLeft, const void* pRight) {
int32_t ret = wcscmp(pLeft, pRight);
if (ret == 0) {
return 0;
} else {
return ret > 0 ? 1 : -1;
}
}
static int32_t compareStrPatternComp(const void* pLeft, const void* pRight) {
SPatternCompareInfo pInfo = {'%', '_'};
const char* pattern = pRight;
const char* str = pLeft;
int32_t ret = patternMatch(pattern, str, strlen(str), &pInfo);
return (ret == TSDB_PATTERN_MATCH) ? 0 : 1;
}
static int32_t compareWStrPatternComp(const void* pLeft, const void* pRight) {
SPatternCompareInfo pInfo = {'%', '_'};
const wchar_t* pattern = pRight;
const wchar_t* str = pLeft;
int32_t ret = WCSPatternMatch(pattern, str, wcslen(str), &pInfo);
return (ret == TSDB_PATTERN_MATCH) ? 0 : 1;
}
static __compar_fn_t getFilterComparator(int32_t type, int32_t filterType, int32_t optr) {
__compar_fn_t comparator = NULL;
switch (type) {
case TSDB_DATA_TYPE_TINYINT:
case TSDB_DATA_TYPE_SMALLINT:
case TSDB_DATA_TYPE_INT:
case TSDB_DATA_TYPE_BIGINT:
case TSDB_DATA_TYPE_BOOL: {
if (filterType >= TSDB_DATA_TYPE_BOOL && filterType <= TSDB_DATA_TYPE_BIGINT) {
comparator = compareIntVal;
} else if (filterType >= TSDB_DATA_TYPE_FLOAT && filterType <= TSDB_DATA_TYPE_DOUBLE) {
comparator = compareIntDoubleVal;
}
break;
}
case TSDB_DATA_TYPE_FLOAT:
case TSDB_DATA_TYPE_DOUBLE: {
if (filterType >= TSDB_DATA_TYPE_BOOL && filterType <= TSDB_DATA_TYPE_BIGINT) {
comparator = compareDoubleIntVal;
} else if (filterType >= TSDB_DATA_TYPE_FLOAT && filterType <= TSDB_DATA_TYPE_DOUBLE) {
comparator = compareDoubleVal;
}
break;
}
case TSDB_DATA_TYPE_BINARY: {
assert(filterType == TSDB_DATA_TYPE_BINARY);
if (optr == TSDB_RELATION_LIKE) { /* wildcard query using like operator */
comparator = compareStrPatternComp;
} else { /* normal relational comparator */
comparator = compareStrVal;
}
break;
}
case TSDB_DATA_TYPE_NCHAR: {
assert(filterType == TSDB_DATA_TYPE_NCHAR);
if (optr == TSDB_RELATION_LIKE) {
comparator = compareWStrPatternComp;
} else {
comparator = compareWStrVal;
}
break;
}
default:
comparator = compareIntVal;
break;
}
return comparator;
}
static void getTagColumnInfo(SSyntaxTreeFilterSupporter* pSupporter, SSchema* pSchema, int32_t* index,
int32_t* offset) {
*index = 0;
*offset = 0;
// filter on table name(TBNAME)
if (strcasecmp(pSchema->name, TSQL_TBNAME_L) == 0) {
*index = TSDB_TBNAME_COLUMN_INDEX;
*offset = TSDB_TBNAME_COLUMN_INDEX;
return;
}
while ((*index) < pSupporter->numOfTags) {
if (pSupporter->pTagSchema[*index].bytes == pSchema->bytes &&
pSupporter->pTagSchema[*index].type == pSchema->type &&
strcmp(pSupporter->pTagSchema[*index].name, pSchema->name) == 0) {
break;
} else {
(*offset) += pSupporter->pTagSchema[(*index)++].bytes;
}
}
}
void filterPrepare(void* expr, void* param) {
tSQLBinaryExpr *pExpr = (tSQLBinaryExpr*) expr;
if (pExpr->info != NULL) {
return;
}
int32_t i = 0, offset = 0;
pExpr->info = calloc(1, sizeof(tQueryInfo));
tQueryInfo* pInfo = pExpr->info;
SSyntaxTreeFilterSupporter* pSupporter = (SSyntaxTreeFilterSupporter*)param;
tVariant* pCond = pExpr->pRight->pVal;
SSchema* pSchema = pExpr->pLeft->pSchema;
getTagColumnInfo(pSupporter, pSchema, &i, &offset);
assert((i >= 0 && i < TSDB_MAX_TAGS) || (i == TSDB_TBNAME_COLUMN_INDEX));
assert((offset >= 0 && offset < TSDB_MAX_TAGS_LEN) || (offset == TSDB_TBNAME_COLUMN_INDEX));
pInfo->sch = *pSchema;
pInfo->colIdx = i;
pInfo->optr = pExpr->nSQLBinaryOptr;
pInfo->offset = offset;
pInfo->compare = getFilterComparator(pSchema->type, pCond->nType, pInfo->optr);
tVariantAssign(&pInfo->q, pCond);
tVariantTypeSetType(&pInfo->q, pInfo->sch.type);
}
void tSQLListTraverseDestroyInfo(void* param) {
if (param == NULL) {
return;
}
tQueryInfo* pInfo = (tQueryInfo*)param;
tVariantDestroy(&(pInfo->q));
free(param);
}
static int32_t mgmtFilterMeterByIndex(STabObj* pMetric, tQueryResultset* pRes, char* pCond, int32_t condLen) {
SSchema* pTagSchema = (SSchema*)(pMetric->schema + pMetric->numOfColumns * sizeof(SSchema));
tSQLBinaryExpr* pExpr = NULL;
tSQLBinaryExprFromString(&pExpr, pTagSchema, pMetric->numOfTags, pCond, condLen);
// failed to build expression, no result, return immediately
if (pExpr == NULL) {
mError("metric:%s, no result returned, error in super table query expression:%s", pMetric->meterId, pCond);
tfree(pCond);
return TSDB_CODE_OPS_NOT_SUPPORT;
} else { // query according to the binary expression
SSyntaxTreeFilterSupporter s = {.pTagSchema = pTagSchema, .numOfTags = pMetric->numOfTags};
SBinaryFilterSupp supp = {.fp = (__result_filter_fn_t)tSkipListNodeFilterCallback,
.setupInfoFn = (__do_filter_suppl_fn_t)filterPrepare,
.pExtInfo = &s};
// tSQLBinaryExprTraverse(pExpr, pMetric->pSkipList, pRes, &supp);
tSQLBinaryExprDestroy(&pExpr, tSQLListTraverseDestroyInfo);
}
tansformQueryResult(pRes);
return TSDB_CODE_SUCCESS;
}
int32_t mgmtRetrieveMetersFromSuperTable(SSuperTableMetaMsg* pMsg, int32_t tableIndex, tQueryResultset* pRes) {
SMetricMetaElemMsg* pElem = (SMetricMetaElemMsg*)((char*)pMsg + pMsg->metaElem[tableIndex]);
STabObj* pMetric = mgmtGetTable(pElem->meterId);
char* pCond = NULL;
char* tmpTableNameCond = NULL;
// no table created in accordance with this metric.
if (pMetric->pSkipList == NULL || pMetric->pSkipList->nSize == 0) {
assert(pMetric->numOfMeters == 0);
return TSDB_CODE_SUCCESS;
}
char* pQueryCond = (char*)pMsg + pElem->cond;
int32_t condLen = pElem->condLen;
// transfer the unicode string to mbs binary expression
if (condLen > 0) {
pCond = calloc(1, (condLen + 1) * TSDB_NCHAR_SIZE);
taosUcs4ToMbs(pQueryCond, condLen * TSDB_NCHAR_SIZE, pCond);
condLen = strlen(pCond) + 1;
mTrace("metric:%s len:%d, metric query condition:%s", pMetric->meterId, condLen, pCond);
}
char* tablenameCond = (char*)pMsg + pElem->tableCond;
if (pElem->tableCondLen > 0) {
tmpTableNameCond = calloc(1, pElem->tableCondLen + 1);
strncpy(tmpTableNameCond, tablenameCond, pElem->tableCondLen);
mTrace("metric:%s rel:%d, len:%d, table name cond:%s", pMetric->meterId, pElem->rel, pElem->tableCondLen,
tmpTableNameCond);
}
if (pElem->tableCondLen > 0 || condLen > 0) {
mgmtFilterByTableNameCond(pRes, tmpTableNameCond, pElem->tableCondLen, pMetric);
bool noNextCal = (pRes->num == 0 && pElem->rel == TSDB_RELATION_AND); // no need to calculate next result
if (!noNextCal && condLen > 0) {
tQueryResultset filterRes = {0};
int32_t ret = mgmtFilterMeterByIndex(pMetric, &filterRes, pCond, condLen);
if (ret != TSDB_CODE_SUCCESS) {
tfree(pCond);
tfree(tmpTableNameCond);
return ret;
}
// union or intersect of two results
assert(pElem->rel == TSDB_RELATION_AND || pElem->rel == TSDB_RELATION_OR);
if (pElem->rel == TSDB_RELATION_AND) {
if (filterRes.num == 0 || pRes->num == 0) { // intersect two sets
tQueryResultClean(pRes);
} else {
queryResultIntersect(pRes, &filterRes);
}
} else { // union two sets
queryResultUnion(pRes, &filterRes);
}
tQueryResultClean(&filterRes);
}
} else {
mTrace("metric:%s retrieve all meter, no query condition", pMetric->meterId);
pRes->num = tSkipListIterateList(pMetric->pSkipList, (tSkipListNode***)&pRes->pRes, NULL, NULL);
tansformQueryResult(pRes);
}
tfree(pCond);
tfree(tmpTableNameCond);
mTrace("metric:%s numOfRes:%d", pMetric->meterId, pRes->num);
return TSDB_CODE_SUCCESS;
}
// todo refactor!!!!!
static char* getTagValueFromMeter(STabObj* pTable, int32_t offset, int32_t len, char* param) {
if (offset == TSDB_TBNAME_COLUMN_INDEX) {
extractTableName(pTable->meterId, param);
} else {
char* tags = pTable->pTagData + offset + TSDB_TABLE_ID_LEN; // tag start position
memcpy(param, tags, len); // make sure the value is null-terminated string
}
return param;
}
bool tSkipListNodeFilterCallback(const void* pNode, void* param) {
tQueryInfo* pInfo = (tQueryInfo*)param;
STabObj* pTable = (STabObj*)(((tSkipListNode*)pNode)->pData);
char buf[TSDB_MAX_TAGS_LEN] = {0};
char* val = getTagValueFromMeter(pTable, pInfo->offset, pInfo->sch.bytes, buf);
int8_t type = pInfo->sch.type;
int32_t ret = 0;
if (pInfo->q.nType == TSDB_DATA_TYPE_BINARY || pInfo->q.nType == TSDB_DATA_TYPE_NCHAR) {
ret = pInfo->compare(val, pInfo->q.pz);
} else {
tVariant t = {0};
tVariantCreateFromBinary(&t, val, (uint32_t) pInfo->sch.bytes, type);
ret = pInfo->compare(&t.i64Key, &pInfo->q.i64Key);
}
switch (pInfo->optr) {
case TSDB_RELATION_EQUAL: {
return ret == 0;
}
case TSDB_RELATION_NOT_EQUAL: {
return ret != 0;
}
case TSDB_RELATION_LARGE_EQUAL: {
return ret >= 0;
}
case TSDB_RELATION_LARGE: {
return ret > 0;
}
case TSDB_RELATION_LESS_EQUAL: {
return ret <= 0;
}
case TSDB_RELATION_LESS: {
return ret < 0;
}
case TSDB_RELATION_LIKE: {
return ret == 0;
}
default:
assert(false);
}
return true;
}
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "os.h"
#include "qast.h"
#include "qextbuffer.h"
#include "taosdef.h"
#include "taosmsg.h"
#include "tlog.h"
#include "tutil.h"
#include "vnodeTagMgmt.h"
#define GET_TAG_VAL_POINTER(s, col, sc, t) ((t *)(&((s)->tags[getColumnModelOffset(sc, col)])))
#define GET_TAG_VAL(s, col, sc, t) (*GET_TAG_VAL_POINTER(s, col, sc, t))
static void tTagsPrints(SMeterSidExtInfo *pMeterInfo, SColumnModel *pSchema, SColumnOrderInfo *pOrder);
static void tSidSetDisplay(tSidSet *pSets);
//todo merge with losertree_compar/ext_comp
int32_t doCompare(char* f1, char* f2, int32_t type, int32_t size) {
switch (type) {
case TSDB_DATA_TYPE_INT: DEFAULT_COMP(GET_INT32_VAL(f1), GET_INT32_VAL(f2));
case TSDB_DATA_TYPE_DOUBLE: DEFAULT_COMP(GET_DOUBLE_VAL(f1), GET_DOUBLE_VAL(f2));
case TSDB_DATA_TYPE_FLOAT: DEFAULT_COMP(GET_FLOAT_VAL(f1), GET_FLOAT_VAL(f2));
case TSDB_DATA_TYPE_BIGINT: DEFAULT_COMP(GET_INT64_VAL(f1), GET_INT64_VAL(f2));
case TSDB_DATA_TYPE_SMALLINT: DEFAULT_COMP(GET_INT16_VAL(f1), GET_INT16_VAL(f2));
case TSDB_DATA_TYPE_TINYINT:
case TSDB_DATA_TYPE_BOOL: DEFAULT_COMP(GET_INT8_VAL(f1), GET_INT8_VAL(f2));
case TSDB_DATA_TYPE_NCHAR: {
int32_t ret = wcsncmp((wchar_t*) f1, (wchar_t*) f2, size/TSDB_NCHAR_SIZE);
if (ret == 0) {
return ret;
}
return (ret < 0) ? -1 : 1;
}
default: {
int32_t ret = strncmp(f1, f2, (size_t)size);
if (ret == 0) {
return ret;
}
return (ret < 0) ? -1 : 1;
}
}
}
int32_t meterSidComparator(const void *p1, const void *p2, void *param) {
tOrderDescriptor *pOrderDesc = (tOrderDescriptor *)param;
SMeterSidExtInfo *s1 = (SMeterSidExtInfo *)p1;
SMeterSidExtInfo *s2 = (SMeterSidExtInfo *)p2;
for (int32_t i = 0; i < pOrderDesc->orderIdx.numOfCols; ++i) {
int32_t colIdx = pOrderDesc->orderIdx.pData[i];
char * f1 = NULL;
char * f2 = NULL;
int32_t type = 0;
int32_t bytes = 0;
if (colIdx == -1) {
f1 = s1->tags;
f2 = s2->tags;
type = TSDB_DATA_TYPE_BINARY;
bytes = TSDB_METER_NAME_LEN;
} else {
f1 = GET_TAG_VAL_POINTER(s1, colIdx, pOrderDesc->pColumnModel, char);
f2 = GET_TAG_VAL_POINTER(s2, colIdx, pOrderDesc->pColumnModel, char);
SSchema *pSchema = getColumnModelSchema(pOrderDesc->pColumnModel, colIdx);
type = pSchema->type;
bytes = pSchema->bytes;
}
int32_t ret = doCompare(f1, f2, type, bytes);
if (ret == 0) {
continue;
} else {
return ret;
}
}
return 0;
}
static void median(void **pMeterSids, size_t size, int32_t s1, int32_t s2, tOrderDescriptor *pOrderDesc,
__ext_compar_fn_t compareFn) {
int32_t midIdx = ((s2 - s1) >> 1) + s1;
if (compareFn(pMeterSids[midIdx], pMeterSids[s1], pOrderDesc) == 1) {
tsDataSwap(&pMeterSids[midIdx], &pMeterSids[s1], TSDB_DATA_TYPE_BINARY, size);
}
if (compareFn(pMeterSids[midIdx], pMeterSids[s2], pOrderDesc) == 1) {
tsDataSwap(&pMeterSids[midIdx], &pMeterSids[s1], TSDB_DATA_TYPE_BINARY, size);
tsDataSwap(&pMeterSids[midIdx], &pMeterSids[s2], TSDB_DATA_TYPE_BINARY, size);
} else if (compareFn(pMeterSids[s1], pMeterSids[s2], pOrderDesc) == 1) {
tsDataSwap(&pMeterSids[s1], &pMeterSids[s2], TSDB_DATA_TYPE_BINARY, size);
}
assert(compareFn(pMeterSids[midIdx], pMeterSids[s1], pOrderDesc) <= 0 &&
compareFn(pMeterSids[s1], pMeterSids[s2], pOrderDesc) <= 0);
#ifdef _DEBUG_VIEW
tTagsPrints(pMeterSids[s1], pOrderDesc->pColumnModel, &pOrderDesc->orderIdx);
tTagsPrints(pMeterSids[midIdx], pOrderDesc->pColumnModel, &pOrderDesc->orderIdx);
tTagsPrints(pMeterSids[s2], pOrderDesc->pColumnModel, &pOrderDesc->orderIdx);
#endif
}
static void tInsertSort(void **pMeterSids, size_t size, int32_t startPos, int32_t endPos, void *param,
__ext_compar_fn_t compareFn) {
for (int32_t i = startPos + 1; i <= endPos; ++i) {
for (int32_t j = i; j > startPos; --j) {
if (compareFn(pMeterSids[j], pMeterSids[j - 1], param) == -1) {
tsDataSwap(&pMeterSids[j], &pMeterSids[j - 1], TSDB_DATA_TYPE_BINARY, size);
} else {
break;
}
}
}
}
void tQSortEx(void **pMeterSids, size_t size, int32_t start, int32_t end, void *param, __ext_compar_fn_t compareFn) {
tOrderDescriptor *pOrderDesc = (tOrderDescriptor *)param;
// short array sort, incur another sort procedure instead of quick sort process
if (end - start + 1 <= 8) {
tInsertSort(pMeterSids, size, start, end, pOrderDesc, compareFn);
return;
}
median(pMeterSids, size, start, end, pOrderDesc, compareFn);
int32_t s = start, e = end;
int32_t endRightS = end, startLeftS = start;
while (s < e) {
while (e > s) {
int32_t ret = compareFn(pMeterSids[e], pMeterSids[s], pOrderDesc);
if (ret < 0) {
break;
}
/*
* move the data that equals to pivotal value to the right end of the list
*/
if (ret == 0 && e != endRightS) {
tsDataSwap(&pMeterSids[e], &pMeterSids[endRightS--], TSDB_DATA_TYPE_BINARY, size);
}
e--;
}
if (e != s) {
tsDataSwap(&pMeterSids[e], &pMeterSids[s], TSDB_DATA_TYPE_BINARY, size);
}
while (s < e) {
int32_t ret = compareFn(pMeterSids[s], pMeterSids[e], pOrderDesc);
if (ret > 0) {
break;
}
if (ret == 0 && s != startLeftS) {
tsDataSwap(&pMeterSids[s], &pMeterSids[startLeftS++], TSDB_DATA_TYPE_BINARY, size);
}
s++;
}
if (e != s) {
tsDataSwap(&pMeterSids[s], &pMeterSids[e], TSDB_DATA_TYPE_BINARY, size);
}
}
int32_t rightPartStart = e + 1;
if (endRightS != end && e < end) {
int32_t left = rightPartStart;
int32_t right = end;
while (right > endRightS && left <= endRightS) {
tsDataSwap(&pMeterSids[left++], &pMeterSids[right--], TSDB_DATA_TYPE_BINARY, size);
}
rightPartStart += (end - endRightS);
}
int32_t leftPartEnd = e - 1;
if (startLeftS != end && s > start) {
int32_t left = start;
int32_t right = leftPartEnd;
while (left < startLeftS && right >= startLeftS) {
tsDataSwap(&pMeterSids[left++], &pMeterSids[right--], TSDB_DATA_TYPE_BINARY, size);
}
leftPartEnd -= (startLeftS - start);
}
if (leftPartEnd > start) {
tQSortEx(pMeterSids, size, start, leftPartEnd, pOrderDesc, compareFn);
}
if (rightPartStart < end) {
tQSortEx(pMeterSids, size, rightPartStart, end, pOrderDesc, compareFn);
}
}
int32_t *calculateSubGroup(void **pSids, int32_t numOfMeters, int32_t *numOfSubset, tOrderDescriptor *pOrderDesc,
__ext_compar_fn_t compareFn) {
int32_t *starterPos = (int32_t *)malloc((numOfMeters + 1) * sizeof(int32_t)); // add additional buffer
starterPos[0] = 0;
*numOfSubset = 1;
for (int32_t i = 1; i < numOfMeters; ++i) {
int32_t ret = compareFn(pSids[i - 1], pSids[i], pOrderDesc);
if (ret != 0) {
assert(ret == -1);
starterPos[(*numOfSubset)++] = i;
}
}
starterPos[*numOfSubset] = numOfMeters;
assert(*numOfSubset <= numOfMeters);
return starterPos;
}
tSidSet *tSidSetCreate(struct SMeterSidExtInfo **pMeterSidExtInfo, int32_t numOfMeters, SSchema *pSchema,
int32_t numOfTags, SColIndexEx *colList, int32_t numOfCols) {
tSidSet *pSidSet = (tSidSet *)calloc(1, sizeof(tSidSet) + numOfCols * sizeof(int16_t));
if (pSidSet == NULL) {
return NULL;
}
pSidSet->numOfSids = numOfMeters;
pSidSet->pSids = pMeterSidExtInfo;
pSidSet->pColumnModel = createColumnModel(pSchema, numOfTags, 1);
pSidSet->orderIdx.numOfCols = numOfCols;
/*
* in case of "group by tbname,normal_col", the normal_col is ignored
*/
int32_t numOfTagCols = 0;
for(int32_t i = 0; i < numOfCols; ++i) {
if (colList[i].flag == TSDB_COL_TAG) {
pSidSet->orderIdx.pData[numOfTagCols++] = colList[i].colIdx;
}
}
pSidSet->orderIdx.numOfCols = numOfTagCols;
pSidSet->starterPos = NULL;
return pSidSet;
}
void tSidSetDestroy(tSidSet **pSets) {
if ((*pSets) != NULL) {
tfree((*pSets)->starterPos);
tfree((*pSets)->pColumnModel)(*pSets)->pSids = NULL;
tfree(*pSets);
}
}
void tTagsPrints(SMeterSidExtInfo *pMeterInfo, SColumnModel *pSchema, SColumnOrderInfo *pOrder) {
if (pSchema == NULL) {
return;
}
printf("sid: %-5d tags(", pMeterInfo->sid);
for (int32_t i = 0; i < pOrder->numOfCols; ++i) {
int32_t colIndex = pOrder->pData[i];
// it is the tbname column
if (colIndex == -1) {
printf("%s, ", pMeterInfo->tags);
continue;
}
SSchema* s = getColumnModelSchema(pSchema, colIndex);
switch (s->type) {
case TSDB_DATA_TYPE_INT:
printf("%d, ", GET_TAG_VAL(pMeterInfo, colIndex, pSchema, int32_t));
break;
case TSDB_DATA_TYPE_DOUBLE:
printf("%lf, ", GET_TAG_VAL(pMeterInfo, colIndex, pSchema, double));
break;
case TSDB_DATA_TYPE_FLOAT:
printf("%f, ", GET_TAG_VAL(pMeterInfo, colIndex, pSchema, float));
break;
case TSDB_DATA_TYPE_BIGINT:
printf("%" PRId64 ", ", GET_TAG_VAL(pMeterInfo, colIndex, pSchema, int64_t));
break;
case TSDB_DATA_TYPE_SMALLINT:
printf("%d, ", GET_TAG_VAL(pMeterInfo, colIndex, pSchema, int16_t));
break;
case TSDB_DATA_TYPE_TINYINT:
printf("%d, ", GET_TAG_VAL(pMeterInfo, colIndex, pSchema, int8_t));
break;
case TSDB_DATA_TYPE_BINARY:
printf("%s, ", GET_TAG_VAL_POINTER(pMeterInfo, colIndex, pSchema, char));
break;
case TSDB_DATA_TYPE_NCHAR: {
char *data = GET_TAG_VAL_POINTER(pMeterInfo, colIndex, pSchema, char);
char buffer[512] = {0};
taosUcs4ToMbs(data, s->bytes, buffer);
printf("%s, ", buffer);
break;
}
case TSDB_DATA_TYPE_BOOL:
printf("%d, ", GET_TAG_VAL(pMeterInfo, colIndex, pSchema, int8_t));
break;
default:
assert(false);
}
}
printf(")\n");
}
/*
* display all the subset groups for debug purpose only
*/
static void UNUSED_FUNC tSidSetDisplay(tSidSet *pSets) {
printf("%d meters.\n", pSets->numOfSids);
for (int32_t i = 0; i < pSets->numOfSids; ++i) {
printf("%d\t", pSets->pSids[i]->sid);
}
printf("\n");
printf("total number of subset group is: %d\n", pSets->numOfSubSet);
for (int32_t i = 0; i < pSets->numOfSubSet; ++i) {
int32_t s = pSets->starterPos[i];
int32_t e = pSets->starterPos[i + 1];
printf("the %d-th subgroup: \n", i + 1);
for (int32_t j = s; j < e; ++j) {
tTagsPrints(pSets->pSids[j], pSets->pColumnModel, &pSets->orderIdx);
}
}
}
void tSidSetSort(tSidSet *pSets) {
pTrace("number of meters in sort: %d", pSets->numOfSids);
SColumnOrderInfo *pOrderIdx = &pSets->orderIdx;
if (pOrderIdx->numOfCols == 0 || pSets->numOfSids <= 1 || pSets->pColumnModel == NULL) { // no group by tags clause
pSets->numOfSubSet = 1;
pSets->starterPos = (int32_t *)malloc(sizeof(int32_t) * (pSets->numOfSubSet + 1));
pSets->starterPos[0] = 0;
pSets->starterPos[1] = pSets->numOfSids;
pTrace("all meters belong to one subgroup, no need to subgrouping ops");
#ifdef _DEBUG_VIEW
tSidSetDisplay(pSets);
#endif
} else {
tOrderDescriptor *descriptor =
(tOrderDescriptor *)calloc(1, sizeof(tOrderDescriptor) + sizeof(int16_t) * pSets->orderIdx.numOfCols);
descriptor->pColumnModel = pSets->pColumnModel;
descriptor->orderIdx = pSets->orderIdx;
memcpy(descriptor->orderIdx.pData, pOrderIdx->pData, sizeof(int16_t) * pSets->orderIdx.numOfCols);
tQSortEx((void **)pSets->pSids, POINTER_BYTES, 0, pSets->numOfSids - 1, descriptor, meterSidComparator);
pSets->starterPos =
calculateSubGroup((void **)pSets->pSids, pSets->numOfSids, &pSets->numOfSubSet, descriptor, meterSidComparator);
#ifdef _DEBUG_VIEW
tSidSetDisplay(pSets);
#endif
tfree(descriptor);
}
}
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _DEFAULT_SOURCE
#include "os.h"
#include "qast.h"
#include "tscUtil.h"
#include "tschemautil.h"
#include "vnode.h"
#include "vnodeDataFilterFunc.h"
#include "vnodeStatus.h"
#include "vnodeUtil.h"
int vnodeCheckFileIntegrity(FILE* fp) {
/*
int savedSessions, savedMeterSize;
fseek(fp, TSDB_FILE_HEADER_LEN/3, SEEK_SET);
fscanf(fp, "%d %d", &savedSessions, &savedMeterSize);
if ( (savedSessions != tsSessionsPerVnode) || (savedMeterSize != tsMeterSizeOnFile) ) {
dError("file structure is changed");
return -1;
}
uint64_t checkSum = 0, savedCheckSum=0;
checkSum = taosGetCheckSum(fp, TSDB_FILE_HEADER_LEN);
fseek(fp, TSDB_FILE_HEADER_LEN - cksumsize, SEEK_SET);
fread(&savedCheckSum, cksumsize, 1, fp);
if ( savedCheckSum != checkSum ) {
dError("check sum is not matched:0x%x 0x%x", checkSum, savedCheckSum);
return -1;
}
*/
return 0;
}
void vnodeCreateFileHeaderFd(int fd) {
char temp[TSDB_FILE_HEADER_LEN / 4];
int lineLen;
lineLen = sizeof(temp);
// write the first line`
memset(temp, 0, lineLen);
*(int16_t*)temp = vnodeFileVersion;
sprintf(temp + sizeof(int16_t), "tsdb version: %s\n", version);
/* *((int16_t *)(temp + TSDB_FILE_HEADER_LEN/8)) = vnodeFileVersion; */
lseek(fd, 0, SEEK_SET);
twrite(fd, temp, lineLen);
// second line
memset(temp, 0, lineLen);
twrite(fd, temp, lineLen);
// the third/forth line is the dynamic info
memset(temp, 0, lineLen);
twrite(fd, temp, lineLen);
twrite(fd, temp, lineLen);
}
void vnodeGetHeadFileHeaderInfo(int fd, SVnodeHeadInfo* pHeadInfo) {
lseek(fd, TSDB_FILE_HEADER_LEN / 4, SEEK_SET);
read(fd, pHeadInfo, sizeof(SVnodeHeadInfo));
}
void vnodeUpdateHeadFileHeader(int fd, SVnodeHeadInfo* pHeadInfo) {
lseek(fd, TSDB_FILE_HEADER_LEN / 4, SEEK_SET);
twrite(fd, pHeadInfo, sizeof(SVnodeHeadInfo));
}
void vnodeCreateFileHeader(FILE* fp) {
char temp[TSDB_FILE_HEADER_LEN / 4];
int lineLen;
lineLen = sizeof(temp);
// write the first line
memset(temp, 0, lineLen);
*(int16_t*)temp = vnodeFileVersion;
sprintf(temp + sizeof(int16_t), "tsdb version: %s\n", version);
/* *((int16_t *)(temp + TSDB_FILE_HEADER_LEN/8)) = vnodeFileVersion; */
fseek(fp, 0, SEEK_SET);
fwrite(temp, lineLen, 1, fp);
// second line
memset(temp, 0, lineLen);
fwrite(temp, lineLen, 1, fp);
// the third line is the dynamic info
memset(temp, 0, lineLen);
fwrite(temp, lineLen, 1, fp);
fwrite(temp, lineLen, 1, fp);
}
SSqlGroupbyExpr* vnodeCreateGroupbyExpr(SQueryMeterMsg* pQueryMsg, int32_t* code) {
if (pQueryMsg->numOfGroupCols == 0) {
return NULL;
}
// using group by tag columns
SSqlGroupbyExpr* pGroupbyExpr =
(SSqlGroupbyExpr*)malloc(sizeof(SSqlGroupbyExpr) + pQueryMsg->numOfGroupCols * sizeof(SColIndexEx));
if (pGroupbyExpr == NULL) {
*code = TSDB_CODE_SERV_OUT_OF_MEMORY;
return NULL;
}
SColIndexEx* pGroupbyColInfo = (SColIndexEx*)pQueryMsg->groupbyTagIds;
pGroupbyExpr->numOfGroupCols = pQueryMsg->numOfGroupCols;
pGroupbyExpr->orderType = pQueryMsg->orderType;
pGroupbyExpr->orderIndex = pQueryMsg->orderByIdx;
memcpy(pGroupbyExpr->columnInfo, pGroupbyColInfo, sizeof(SColIndexEx) * pGroupbyExpr->numOfGroupCols);
// TODO: update the colIndexInBuf for each column in group by clause
return pGroupbyExpr;
}
static SSchema* toSchema(SQueryMeterMsg* pQuery, SColumnInfo* pCols, int32_t numOfCols) {
char* start = (char*)pQuery->colNameList;
char* end = start;
SSchema* pSchema = calloc(1, sizeof(SSchema) * numOfCols);
for (int32_t i = 0; i < numOfCols; ++i) {
pSchema[i].type = pCols[i].type;
pSchema[i].bytes = pCols[i].bytes;
pSchema[i].colId = pCols[i].colId;
end = strstr(start, ",");
memcpy(pSchema[i].name, start, end - start);
start = end + 1;
}
return pSchema;
}
static int32_t id_compar(const void* left, const void* right) {
DEFAULT_COMP(GET_INT16_VAL(left), GET_INT16_VAL(right));
}
static int32_t vnodeBuildExprFromArithmeticStr(SSqlFunctionExpr* pExpr, SQueryMeterMsg* pQueryMsg) {
SSqlBinaryExprInfo* pBinaryExprInfo = &pExpr->pBinExprInfo;
SColumnInfo* pColMsg = pQueryMsg->colList;
tSQLBinaryExpr* pBinExpr = NULL;
SSchema* pSchema = toSchema(pQueryMsg, pColMsg, pQueryMsg->numOfCols);
dTrace("qmsg:%p create binary expr from string:%s", pQueryMsg, pExpr->pBase.arg[0].argValue.pz);
tSQLBinaryExprFromString(&pBinExpr, pSchema, pQueryMsg->numOfCols, pExpr->pBase.arg[0].argValue.pz);
if (pBinExpr == NULL) {
dError("qmsg:%p failed to create arithmetic expression string from:%s", pQueryMsg, pExpr->pBase.arg[0].argValue.pz);
return TSDB_CODE_APP_ERROR;
}
pBinaryExprInfo->pBinExpr = pBinExpr;
int32_t num = 0;
int16_t ids[TSDB_MAX_COLUMNS] = {0};
tSQLBinaryExprTrv(pBinExpr, &num, ids);
qsort(ids, num, sizeof(int16_t), id_compar);
int32_t i = 0, j = 0;
while (i < num && j < num) {
if (ids[i] == ids[j]) {
j++;
} else {
ids[++i] = ids[j++];
}
}
assert(i <= num);
// there may be duplicated referenced columns.
num = i + 1;
pBinaryExprInfo->pReqColumns = malloc(sizeof(SColIndexEx) * num);
for (int32_t k = 0; k < num; ++k) {
SColIndexEx* pColIndex = &pBinaryExprInfo->pReqColumns[k];
pColIndex->colId = ids[k];
}
pBinaryExprInfo->numOfCols = num;
free(pSchema);
return TSDB_CODE_SUCCESS;
}
static int32_t getColumnIndexInSource(SQueryMeterMsg* pQueryMsg, SSqlFuncExprMsg* pExprMsg) {
int32_t j = 0;
while(j < pQueryMsg->numOfCols) {
if (pExprMsg->colInfo.colId == pQueryMsg->colList[j].colId) {
break;
}
j += 1;
}
return j;
}
bool vnodeValidateExprColumnInfo(SQueryMeterMsg* pQueryMsg, SSqlFuncExprMsg* pExprMsg) {
int32_t j = getColumnIndexInSource(pQueryMsg, pExprMsg);
return j < pQueryMsg->numOfCols;
}
SSqlFunctionExpr* vnodeCreateSqlFunctionExpr(SQueryMeterMsg* pQueryMsg, int32_t* code) {
SSqlFunctionExpr* pExprs = (SSqlFunctionExpr*)calloc(1, sizeof(SSqlFunctionExpr) * pQueryMsg->numOfOutputCols);
if (pExprs == NULL) {
tfree(pQueryMsg->pSqlFuncExprs);
*code = TSDB_CODE_SERV_OUT_OF_MEMORY;
return NULL;
}
bool isSuperTable = QUERY_IS_STABLE_QUERY(pQueryMsg->queryType);
int16_t tagLen = 0;
SSchema* pTagSchema = (SSchema*)pQueryMsg->pTagSchema;
for (int32_t i = 0; i < pQueryMsg->numOfOutputCols; ++i) {
pExprs[i].pBase = *((SSqlFuncExprMsg**)pQueryMsg->pSqlFuncExprs)[i];
pExprs[i].resBytes = 0;
int16_t type = 0;
int16_t bytes = 0;
SColIndexEx* pColumnIndexExInfo = &pExprs[i].pBase.colInfo;
// tag column schema is kept in pQueryMsg->pColumnModel
if (TSDB_COL_IS_TAG(pColumnIndexExInfo->flag)) {
if (pColumnIndexExInfo->colIdx >= pQueryMsg->numOfTagsCols) {
*code = TSDB_CODE_INVALID_QUERY_MSG;
tfree(pExprs);
return NULL;
}
type = pTagSchema[pColumnIndexExInfo->colIdx].type;
bytes = pTagSchema[pColumnIndexExInfo->colIdx].bytes;
} else { // parse the arithmetic expression
if (pExprs[i].pBase.functionId == TSDB_FUNC_ARITHM) {
*code = vnodeBuildExprFromArithmeticStr(&pExprs[i], pQueryMsg);
if (*code != TSDB_CODE_SUCCESS) {
tfree(pExprs);
return NULL;
}
type = TSDB_DATA_TYPE_DOUBLE;
bytes = tDataTypeDesc[type].nSize;
} else { // parse the normal column
int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].pBase);
assert(j < pQueryMsg->numOfCols);
SColumnInfo* pCol = &pQueryMsg->colList[j];
type = pCol->type;
bytes = pCol->bytes;
}
}
int32_t param = pExprs[i].pBase.arg[0].argValue.i64;
if (getResultDataInfo(type, bytes, pExprs[i].pBase.functionId, param, &pExprs[i].resType, &pExprs[i].resBytes,
&pExprs[i].interResBytes, 0, isSuperTable) != TSDB_CODE_SUCCESS) {
*code = TSDB_CODE_INVALID_QUERY_MSG;
return NULL;
}
if (pExprs[i].pBase.functionId == TSDB_FUNC_TAG_DUMMY || pExprs[i].pBase.functionId == TSDB_FUNC_TS_DUMMY) {
tagLen += pExprs[i].resBytes;
}
assert(isValidDataType(pExprs[i].resType, pExprs[i].resBytes));
}
//get the correct result size for top/bottom query, according to the number of tags columns in selection clause
// TODO refactor
for(int32_t i = 0; i < pQueryMsg->numOfOutputCols; ++i) {
pExprs[i].pBase = *((SSqlFuncExprMsg**)pQueryMsg->pSqlFuncExprs)[i];
int16_t functId = pExprs[i].pBase.functionId;
if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) {
int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].pBase);
assert(j < pQueryMsg->numOfCols);
SColumnInfo* pCol = &pQueryMsg->colList[j];
int16_t type = pCol->type;
int16_t bytes = pCol->bytes;
int32_t ret = getResultDataInfo(type, bytes, pExprs[i].pBase.functionId, pExprs[i].pBase.arg[0].argValue.i64,
&pExprs[i].resType, &pExprs[i].resBytes, &pExprs[i].interResBytes, tagLen, isSuperTable);
assert(ret == TSDB_CODE_SUCCESS);
}
}
tfree(pQueryMsg->pSqlFuncExprs);
return pExprs;
}
bool vnodeIsValidVnodeCfg(SVnodeCfg* pCfg) {
if (pCfg == NULL) return false;
if (pCfg->maxSessions <= 0 || pCfg->cacheBlockSize <= 0 || pCfg->replications <= 0 || pCfg->replications > 20 ||
pCfg->daysPerFile <= 0 || pCfg->daysToKeep <= 0) {
return false;
}
return true;
}
/**
* compare if schema of two tables are identical.
* when multi-table query is issued, the schemas of all requested tables
* should be identical. Otherwise,query process will abort.
*/
bool vnodeMeterSchemaIdentical(SColumn* pSchema1, int32_t numOfCols1, SColumn* pSchema2, int32_t numOfCols2) {
if (!VALIDNUMOFCOLS(numOfCols1) || !VALIDNUMOFCOLS(numOfCols2) || numOfCols1 != numOfCols2) {
return false;
}
return memcmp((char*)pSchema1, (char*)pSchema2, sizeof(SColumn) * numOfCols1) == 0;
}
void vnodeFreeFields(SQuery* pQuery) {
if (pQuery == NULL || pQuery->pFields == NULL) {
return;
}
for (int32_t i = 0; i < pQuery->numOfBlocks; ++i) {
tfree(pQuery->pFields[i]);
}
/*
* pQuery->pFields does not need to be released, it is allocated at the last part of pBlock
* so free(pBlock) can release this memory at the same time.
*/
pQuery->pFields = NULL;
pQuery->numOfBlocks = 0;
}
void vnodeUpdateFilterColumnIndex(SQuery* pQuery) {
for (int32_t i = 0; i < pQuery->numOfFilterCols; ++i) {
for (int16_t j = 0; j < pQuery->numOfCols; ++j) {
if (pQuery->pFilterInfo[i].info.data.colId == pQuery->colList[j].data.colId) {
pQuery->pFilterInfo[i].info.colIdx = pQuery->colList[j].colIdx;
pQuery->pFilterInfo[i].info.colIdxInBuf = pQuery->colList[j].colIdxInBuf;
// supplementary scan is also require this column
pQuery->colList[j].req[1] = 1;
break;
}
}
}
// set the column index in buffer for arithmetic operation
if (pQuery->pSelectExpr == NULL) {
return;
}
for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) {
SSqlBinaryExprInfo* pBinExprInfo = &pQuery->pSelectExpr[i].pBinExprInfo;
if (pBinExprInfo->pBinExpr == NULL) {
continue;
}
for (int16_t j = 0; j < pBinExprInfo->numOfCols; ++j) {
for (int32_t k = 0; k < pQuery->numOfCols; ++k) {
if (pBinExprInfo->pReqColumns[j].colId == pQuery->colList[k].data.colId) {
pBinExprInfo->pReqColumns[j].colIdxInBuf = pQuery->colList[k].colIdxInBuf;
assert(pQuery->colList[k].colIdxInBuf == k);
break;
}
}
}
}
}
// TODO support k<12 and k<>9
int32_t vnodeCreateFilterInfo(void* pQInfo, SQuery* pQuery) {
for (int32_t i = 0; i < pQuery->numOfCols; ++i) {
if (pQuery->colList[i].data.numOfFilters > 0) {
pQuery->numOfFilterCols++;
}
}
if (pQuery->numOfFilterCols == 0) {
return TSDB_CODE_SUCCESS;
}
pQuery->pFilterInfo = calloc(1, sizeof(SSingleColumnFilterInfo) * pQuery->numOfFilterCols);
for (int32_t i = 0, j = 0; i < pQuery->numOfCols; ++i) {
if (pQuery->colList[i].data.numOfFilters > 0) {
SSingleColumnFilterInfo* pFilterInfo = &pQuery->pFilterInfo[j];
memcpy(&pFilterInfo->info, &pQuery->colList[i], sizeof(SColumnInfoEx));
pFilterInfo->info.data.filters = NULL;
pFilterInfo->numOfFilters = pQuery->colList[i].data.numOfFilters;
pFilterInfo->pFilters = calloc(pFilterInfo->numOfFilters, sizeof(SColumnFilterElem));
for(int32_t f = 0; f < pFilterInfo->numOfFilters; ++f) {
SColumnFilterElem *pSingleColFilter = &pFilterInfo->pFilters[f];
pSingleColFilter->filterInfo = pQuery->colList[i].data.filters[f];
int32_t lower = pSingleColFilter->filterInfo.lowerRelOptr;
int32_t upper = pSingleColFilter->filterInfo.upperRelOptr;
if (lower == TSDB_RELATION_INVALID && upper == TSDB_RELATION_INVALID) {
dError("QInfo:%p invalid filter info", pQInfo);
return TSDB_CODE_INVALID_QUERY_MSG;
}
int16_t type = pQuery->colList[i].data.type;
int16_t bytes = pQuery->colList[i].data.bytes;
__filter_func_t *rangeFilterArray = vnodeGetRangeFilterFuncArray(type);
__filter_func_t *filterArray = vnodeGetValueFilterFuncArray(type);
if (rangeFilterArray == NULL && filterArray == NULL) {
dError("QInfo:%p failed to get filter function, invalid data type:%d", pQInfo, type);
return TSDB_CODE_INVALID_QUERY_MSG;
}
if ((lower == TSDB_RELATION_LARGE_EQUAL || lower == TSDB_RELATION_LARGE) &&
(upper == TSDB_RELATION_LESS_EQUAL || upper == TSDB_RELATION_LESS)) {
if (lower == TSDB_RELATION_LARGE_EQUAL) {
if (upper == TSDB_RELATION_LESS_EQUAL) {
pSingleColFilter->fp = rangeFilterArray[4];
} else {
pSingleColFilter->fp = rangeFilterArray[2];
}
} else {
if (upper == TSDB_RELATION_LESS_EQUAL) {
pSingleColFilter->fp = rangeFilterArray[3];
} else {
pSingleColFilter->fp = rangeFilterArray[1];
}
}
} else { // set callback filter function
if (lower != TSDB_RELATION_INVALID) {
pSingleColFilter->fp = filterArray[lower];
if (upper != TSDB_RELATION_INVALID) {
dError("pQInfo:%p failed to get filter function, invalid filter condition", pQInfo, type);
return TSDB_CODE_INVALID_QUERY_MSG;
}
} else {
pSingleColFilter->fp = filterArray[upper];
}
}
assert (pSingleColFilter->fp != NULL);
pSingleColFilter->bytes = bytes;
}
j++;
}
}
return TSDB_CODE_SUCCESS;
}
bool vnodeDoFilterData(SQuery* pQuery, int32_t elemPos) {
for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) {
SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k];
char* pElem = pFilterInfo->pData + pFilterInfo->info.data.bytes * elemPos;
if(isNull(pElem, pFilterInfo->info.data.type)) {
return false;
}
int32_t num = pFilterInfo->numOfFilters;
bool qualified = false;
for(int32_t j = 0; j < num; ++j) {
SColumnFilterElem* pFilterElem = &pFilterInfo->pFilters[j];
if (pFilterElem->fp(pFilterElem, pElem, pElem)) {
qualified = true;
break;
}
}
if (!qualified) {
return false;
}
}
return true;
}
bool vnodeFilterData(SQuery* pQuery, int32_t* numOfActualRead, int32_t index) {
(*numOfActualRead)++;
if (!vnodeDoFilterData(pQuery, index)) {
return false;
}
if (pQuery->limit.offset > 0) {
pQuery->limit.offset--; // ignore this qualified row
return false;
}
return true;
}
bool vnodeIsProjectionQuery(SSqlFunctionExpr* pExpr, int32_t numOfOutput) {
for (int32_t i = 0; i < numOfOutput; ++i) {
if (pExpr[i].pBase.functionId != TSDB_FUNC_PRJ) {
return false;
}
}
return true;
}
/*
* the pTable->state may be changed by vnodeIsSafeToDeleteMeter and import/update processor, the check of
* the state will not always be correct.
*
* The import/update/deleting is actually blocked by current query processing if the check of meter state is
* passed, but later queries are denied.
*
* 1. vnodeIsSafeToDelete will wait for this complete, since it also use the vmutex to check the numOfQueries
* 2. import will check the numOfQueries again after setting state to be TSDB_METER_STATE_IMPORTING, while the
* vmutex is also used.
* 3. insert has nothing to do with the query processing.
*/
int32_t vnodeIncQueryRefCount(SQueryMeterMsg* pQueryMsg, SMeterSidExtInfo** pSids, SMeterObj** pMeterObjList,
int32_t* numOfIncTables) {
SVnodeObj* pVnode = &vnodeList[pQueryMsg->vnode];
int32_t num = 0;
int32_t index = 0;
int32_t code = TSDB_CODE_SUCCESS;
for (int32_t i = 0; i < pQueryMsg->numOfSids; ++i) {
SMeterObj* pTable = pVnode->meterList[pSids[i]->sid];
/*
* If table is missing or is in dropping status, config it from management node, and ignore it
* during query processing. The error code of TSDB_CODE_NOT_ACTIVE_TABLE will never return to client.
* The missing table needs to be removed from pSids list
*/
if (pTable == NULL || vnodeIsMeterState(pTable, TSDB_METER_STATE_DROPPING)) {
dWarn("qmsg:%p, vid:%d sid:%d, not there or will be dropped, ignore this table in query", pQueryMsg,
pQueryMsg->vnode, pSids[i]->sid);
vnodeSendMeterCfgMsg(pQueryMsg->vnode, pSids[i]->sid);
continue;
} else if (pTable->uid != pSids[i]->uid || pTable->sid != pSids[i]->sid) {
code = TSDB_CODE_TABLE_ID_MISMATCH;
dError("qmsg:%p, vid:%d sid:%d id:%s uid:%" PRIu64 ", id mismatch. sid:%d uid:%" PRId64 " in msg", pQueryMsg,
pQueryMsg->vnode, pTable->sid, pTable->meterId, pTable->uid, pSids[i]->sid, pSids[i]->uid);
vnodeSendMeterCfgMsg(pQueryMsg->vnode, pSids[i]->sid);
continue;
} else if (pTable->state > TSDB_METER_STATE_INSERTING) { //update or import
code = TSDB_CODE_ACTION_IN_PROGRESS;
dTrace("qmsg:%p, vid:%d sid:%d id:%s, it is in state:%s, wait!", pQueryMsg, pQueryMsg->vnode, pSids[i]->sid,
pTable->meterId, taosGetTableStatusStr(pTable->state));
continue;
}
/*
* vnodeIsSafeToDeleteMeter will wait for this function complete, and then it can
* check if the numOfQueries is 0 or not.
*/
pMeterObjList[(*numOfIncTables)++] = pTable;
atomic_fetch_add_32(&pTable->numOfQueries, 1);
pSids[index++] = pSids[i];
// output for meter more than one query executed
if (pTable->numOfQueries > 1) {
dTrace("qmsg:%p, vid:%d sid:%d id:%s, inc query ref, numOfQueries:%d", pQueryMsg, pTable->vnode, pTable->sid,
pTable->meterId, pTable->numOfQueries);
num++;
}
}
dTrace("qmsg:%p, query meters: %d, inc query ref %d, numOfQueries on %d meters are 1, queried meters:%d after "
"filter missing meters", pQueryMsg, pQueryMsg->numOfSids, *numOfIncTables, (*numOfIncTables) - num, index);
assert(pQueryMsg->numOfSids >= (*numOfIncTables) && pQueryMsg->numOfSids >= index);
pQueryMsg->numOfSids = index;
return code;
}
void vnodeDecQueryRefCount(SQueryMeterMsg* pQueryMsg, SMeterObj** pMeterObjList, int32_t numOfIncTables) {
int32_t num = 0;
for (int32_t i = 0; i < numOfIncTables; ++i) {
SMeterObj* pTable = pMeterObjList[i];
if (pTable != NULL) { // here, do not need to lock to perform operations
atomic_fetch_sub_32(&pTable->numOfQueries, 1);
if (pTable->numOfQueries > 0) {
dTrace("qmsg:%p, vid:%d sid:%d id:%s dec query ref, numOfQueries:%d", pQueryMsg, pTable->vnode, pTable->sid,
pTable->meterId, pTable->numOfQueries);
num++;
}
}
}
dTrace("qmsg:%p, dec query ref for %d meters, numOfQueries on %d meters are 0", pQueryMsg, numOfIncTables, numOfIncTables - num);
}
void vnodeUpdateQueryColumnIndex(SQuery* pQuery, SMeterObj* pMeterObj) {
if (pQuery == NULL || pMeterObj == NULL) {
return;
}
int32_t i = 0, j = 0;
while (i < pQuery->numOfCols && j < pMeterObj->numOfColumns) {
if (pQuery->colList[i].data.colId == pMeterObj->schema[j].colId) {
pQuery->colList[i++].colIdx = (int16_t)j++;
} else if (pQuery->colList[i].data.colId < pMeterObj->schema[j].colId) {
pQuery->colList[i++].colIdx = -1;
} else if (pQuery->colList[i].data.colId > pMeterObj->schema[j].colId) {
j++;
}
}
while (i < pQuery->numOfCols) {
pQuery->colList[i++].colIdx = -1; // not such column in current meter
}
// sql expression has not been created yet
if (pQuery->pSelectExpr == NULL) {
return;
}
for(int32_t k = 0; k < pQuery->numOfOutputCols; ++k) {
SSqlFuncExprMsg* pSqlExprMsg = &pQuery->pSelectExpr[k].pBase;
if (pSqlExprMsg->functionId == TSDB_FUNC_ARITHM || pSqlExprMsg->colInfo.flag == TSDB_COL_TAG) {
continue;
}
SColIndexEx* pColIndexEx = &pSqlExprMsg->colInfo;
for(int32_t f = 0; f < pQuery->numOfCols; ++f) {
if (pColIndexEx->colId == pQuery->colList[f].data.colId) {
pColIndexEx->colIdx = pQuery->colList[f].colIdx;
break;
}
}
}
}
int32_t vnodeSetMeterState(SMeterObj* pMeterObj, int32_t state) {
return atomic_val_compare_exchange_32(&pMeterObj->state, TSDB_METER_STATE_READY, state);
}
void vnodeClearMeterState(SMeterObj* pMeterObj, int32_t state) {
pMeterObj->state &= (~state);
}
bool vnodeIsMeterState(SMeterObj* pMeterObj, int32_t state) {
if (state == TSDB_METER_STATE_READY) {
return pMeterObj->state == TSDB_METER_STATE_READY;
} else if (state == TSDB_METER_STATE_DROPPING) {
return pMeterObj->state >= state;
} else {
return (((pMeterObj->state) & state) == state);
}
}
void vnodeSetMeterDeleting(SMeterObj* pMeterObj) {
if (pMeterObj == NULL) {
return;
}
pMeterObj->state |= TSDB_METER_STATE_DROPPING;
}
int32_t vnodeSetMeterInsertImportStateEx(SMeterObj* pObj, int32_t st) {
int32_t code = TSDB_CODE_SUCCESS;
int32_t state = vnodeSetMeterState(pObj, st);
if (state != TSDB_METER_STATE_READY) {//return to denote import is not performed
if (vnodeIsMeterState(pObj, TSDB_METER_STATE_DROPPING)) {
dTrace("vid:%d sid:%d id:%s, meter is deleted, state:%d", pObj->vnode, pObj->sid, pObj->meterId,
pObj->state);
code = TSDB_CODE_NOT_ACTIVE_TABLE;
} else {// waiting for 300ms by default and try again
dTrace("vid:%d sid:%d id:%s, try submit again since in state:%d", pObj->vnode, pObj->sid,
pObj->meterId, pObj->state);
code = TSDB_CODE_ACTION_IN_PROGRESS;
}
}
return code;
}
bool vnodeIsSafeToDeleteMeter(SVnodeObj* pVnode, int32_t sid) {
SMeterObj* pObj = pVnode->meterList[sid];
if (pObj == NULL || vnodeIsMeterState(pObj, TSDB_METER_STATE_DROPPED)) {
return true;
}
int32_t prev = vnodeSetMeterState(pObj, TSDB_METER_STATE_DROPPING);
/*
* if the meter is not in ready/deleting state, it must be in insert/import/update,
* set the deleting state and wait the procedure to be completed
*/
if (prev != TSDB_METER_STATE_READY && prev < TSDB_METER_STATE_DROPPING) {
vnodeSetMeterDeleting(pObj);
dWarn("vid:%d sid:%d id:%s, can not be deleted, state:%d, wait", pObj->vnode, pObj->sid, pObj->meterId, prev);
return false;
}
bool ready = true;
/*
* the query will be stopped ASAP, since the state of meter is set to TSDB_METER_STATE_DROPPING,
* and new query will abort since the meter is deleted.
*/
pthread_mutex_lock(&pVnode->vmutex);
if (pObj->numOfQueries > 0) {
dWarn("vid:%d sid:%d id:%s %d queries executing on it, wait query to be finished",
pObj->vnode, pObj->sid, pObj->meterId, pObj->numOfQueries);
ready = false;
}
pthread_mutex_unlock(&pVnode->vmutex);
return ready;
}
void vnodeFreeColumnInfo(SColumnInfo* pColumnInfo) {
if (pColumnInfo == NULL) {
return;
}
if (pColumnInfo->numOfFilters > 0) {
if (pColumnInfo->type == TSDB_DATA_TYPE_BINARY) {
for (int32_t i = 0; i < pColumnInfo->numOfFilters; ++i) {
tfree(pColumnInfo->filters[i].pz);
pColumnInfo->filters[i].len = 0;
}
}
tfree(pColumnInfo->filters);
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册