提交 b83f8957 编写于 作者: H Haojun Liao

refactor: do some internal refactor.

上级 ea83ae23
...@@ -123,14 +123,20 @@ ELSE () ...@@ -123,14 +123,20 @@ ELSE ()
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -Wno-literal-suffix -Werror=return-type -fPIC -gdwarf-2 -g3 -Wformat=2 -Wno-format-nonliteral -Wno-format-truncation -Wno-format-y2k") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -Wno-literal-suffix -Werror=return-type -fPIC -gdwarf-2 -g3 -Wformat=2 -Wno-format-nonliteral -Wno-format-truncation -Wno-format-y2k")
ENDIF () ENDIF ()
MESSAGE("System processor ID: ${CMAKE_SYSTEM_PROCESSOR}")
IF (TD_INTEL_64 OR TD_INTEL_32) IF (TD_INTEL_64 OR TD_INTEL_32)
ADD_DEFINITIONS("-msse4.2 -mavx -mavx2") ADD_DEFINITIONS("-msse4.2")
IF("${FMA_SUPPORT}" MATCHES "true") IF("${FMA_SUPPORT}" MATCHES "true")
MESSAGE(STATUS "turn fma function support on") MESSAGE(STATUS "fma function supported")
ADD_DEFINITIONS("-mfma") ADD_DEFINITIONS("-mfma")
ELSE () ELSE ()
MESSAGE(STATUS "turn fma function support off") MESSAGE(STATUS "fma function NOT supported")
ENDIF()
IF("${SIMD_SUPPORT}" MATCHES "true")
ADD_DEFINITIONS("-mavx -mavx2")
MESSAGE(STATUS "cpu simd instruction AVX/AVX2 supported")
ELSE()
MESSAGE(STATUS "cpu simd instruction AVX/AVX2 NOT supported")
ENDIF() ENDIF()
ENDIF () ENDIF ()
......
cmake_minimum_required(VERSION 3.0) cmake_minimum_required(VERSION 3.0)
MESSAGE("Current system is ${CMAKE_SYSTEM_NAME}")
# init # init
SET(TD_LINUX FALSE) SET(TD_LINUX FALSE)
SET(TD_WINDOWS FALSE) SET(TD_WINDOWS FALSE)
SET(TD_DARWIN FALSE) SET(TD_DARWIN FALSE)
MESSAGE("Compiler ID: ${CMAKE_CXX_COMPILER_ID}")
if(CMAKE_COMPILER_IS_GNUCXX MATCHES 1) if(CMAKE_COMPILER_IS_GNUCXX MATCHES 1)
set(CXX_COMPILER_IS_GNU TRUE) set(CXX_COMPILER_IS_GNU TRUE)
else() else()
set(CXX_COMPILER_IS_GNU FALSE) set(CXX_COMPILER_IS_GNU FALSE)
endif() endif()
MESSAGE("Current system name is ${CMAKE_SYSTEM_NAME}.") MESSAGE("Current system: ${CMAKE_SYSTEM_NAME}")
IF (${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "Darwin") IF (${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
...@@ -26,6 +23,8 @@ IF (${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "Darwin ...@@ -26,6 +23,8 @@ IF (${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "Darwin
set(CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS "${CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS} -undefined dynamic_lookup") set(CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS "${CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS} -undefined dynamic_lookup")
ENDIF () ENDIF ()
MESSAGE("Current system processor: ${CMAKE_SYSTEM_PROCESSOR}")
IF (${CMAKE_SYSTEM_NAME} MATCHES "Linux") IF (${CMAKE_SYSTEM_NAME} MATCHES "Linux")
SET(TD_LINUX TRUE) SET(TD_LINUX TRUE)
...@@ -44,7 +43,6 @@ IF (${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "Darwin ...@@ -44,7 +43,6 @@ IF (${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "Darwin
SET(OSTYPE "macOS") SET(OSTYPE "macOS")
ADD_DEFINITIONS("-DDARWIN -Wno-tautological-pointer-compare") ADD_DEFINITIONS("-DDARWIN -Wno-tautological-pointer-compare")
MESSAGE("Current system processor is ${CMAKE_SYSTEM_PROCESSOR}.")
IF (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm64") IF (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm64")
MESSAGE("Current system arch is arm64") MESSAGE("Current system arch is arm64")
SET(TD_DARWIN_64 TRUE) SET(TD_DARWIN_64 TRUE)
...@@ -80,24 +78,22 @@ ELSEIF (${CMAKE_SYSTEM_NAME} MATCHES "Windows") ...@@ -80,24 +78,22 @@ ELSEIF (${CMAKE_SYSTEM_NAME} MATCHES "Windows")
ENDIF() ENDIF()
IF ("${CPUTYPE}" STREQUAL "") IF ("${CPUTYPE}" STREQUAL "")
MESSAGE(STATUS "The current platform " ${CMAKE_SYSTEM_PROCESSOR} " is detected")
IF (CMAKE_SYSTEM_PROCESSOR MATCHES "(amd64)|(AMD64)") IF (CMAKE_SYSTEM_PROCESSOR MATCHES "(amd64)|(AMD64)")
MESSAGE(STATUS "The current platform is amd64") MESSAGE(STATUS "Current platform is amd64")
SET(PLATFORM_ARCH_STR "amd64") SET(PLATFORM_ARCH_STR "amd64")
SET(TD_INTEL_64 TRUE) SET(TD_INTEL_64 TRUE)
ELSEIF (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)") ELSEIF (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)")
MESSAGE(STATUS "The current platform is x86") MESSAGE(STATUS "Current platform is x86")
SET(PLATFORM_ARCH_STR "i386") SET(PLATFORM_ARCH_STR "i386")
SET(TD_INTEL_32 TRUE) SET(TD_INTEL_32 TRUE)
ELSEIF (CMAKE_SYSTEM_PROCESSOR MATCHES "armv7l") ELSEIF (CMAKE_SYSTEM_PROCESSOR MATCHES "armv7l")
MESSAGE(STATUS "The current platform is aarch32") MESSAGE(STATUS "Current platform is aarch32")
SET(PLATFORM_ARCH_STR "arm") SET(PLATFORM_ARCH_STR "arm")
SET(TD_ARM_32 TRUE) SET(TD_ARM_32 TRUE)
ADD_DEFINITIONS("-D_TD_ARM_") ADD_DEFINITIONS("-D_TD_ARM_")
ADD_DEFINITIONS("-D_TD_ARM_32") ADD_DEFINITIONS("-D_TD_ARM_32")
ELSEIF (CMAKE_SYSTEM_PROCESSOR MATCHES "(aarch64)|(arm64)") ELSEIF (CMAKE_SYSTEM_PROCESSOR MATCHES "(aarch64)|(arm64)")
MESSAGE(STATUS "The current platform is aarch64") MESSAGE(STATUS "Current platform is aarch64")
SET(PLATFORM_ARCH_STR "arm64") SET(PLATFORM_ARCH_STR "arm64")
SET(TD_ARM_64 TRUE) SET(TD_ARM_64 TRUE)
ADD_DEFINITIONS("-D_TD_ARM_") ADD_DEFINITIONS("-D_TD_ARM_")
......
...@@ -26,7 +26,7 @@ ELSEIF (HAVE_GIT) ...@@ -26,7 +26,7 @@ ELSEIF (HAVE_GIT)
SET(TD_VER_GIT "no git commit id") SET(TD_VER_GIT "no git commit id")
ENDIF () ENDIF ()
ELSE () ELSE ()
message(STATUS "no git cmd") message(STATUS "no git found")
SET(TD_VER_GIT "no git commit id") SET(TD_VER_GIT "no git commit id")
ENDIF () ENDIF ()
...@@ -70,9 +70,9 @@ MESSAGE(STATUS "compatible: " ${TD_VER_COMPATIBLE}) ...@@ -70,9 +70,9 @@ MESSAGE(STATUS "compatible: " ${TD_VER_COMPATIBLE})
MESSAGE(STATUS "commit id: " ${TD_VER_GIT}) MESSAGE(STATUS "commit id: " ${TD_VER_GIT})
MESSAGE(STATUS "build date: " ${TD_VER_DATE}) MESSAGE(STATUS "build date: " ${TD_VER_DATE})
MESSAGE(STATUS "build type: " ${CMAKE_BUILD_TYPE}) MESSAGE(STATUS "build type: " ${CMAKE_BUILD_TYPE})
MESSAGE(STATUS "type: " ${TD_VER_VERTYPE}) MESSAGE(STATUS "type: " ${TD_VER_VERTYPE})
MESSAGE(STATUS "cpu: " ${TD_VER_CPUTYPE}) MESSAGE(STATUS "cpu: " ${TD_VER_CPUTYPE})
MESSAGE(STATUS "os: " ${TD_VER_OSTYPE}) MESSAGE(STATUS "os: " ${TD_VER_OSTYPE})
MESSAGE(STATUS "============= compile version parameter information end ============= ") MESSAGE(STATUS "============= compile version parameter information end ============= ")
STRING(REPLACE "." "_" TD_LIB_VER_NUMBER ${TD_VER_NUMBER}) STRING(REPLACE "." "_" TD_LIB_VER_NUMBER ${TD_VER_NUMBER})
...@@ -81,6 +81,13 @@ extern "C" { ...@@ -81,6 +81,13 @@ extern "C" {
#include <string.h> #include <string.h>
#include <wchar.h> #include <wchar.h>
#include <wctype.h> #include <wctype.h>
#include <cpuid.h>
#if __AVX__
#include <immintrin.h>
#elif __SSE4_2__
#include <nmmintrin.h>
#endif
#include "osThread.h" #include "osThread.h"
......
...@@ -168,22 +168,22 @@ void syslog(int unused, const char *format, ...); ...@@ -168,22 +168,22 @@ void syslog(int unused, const char *format, ...);
} \ } \
} while (0) } while (0)
#define DEFAULT_DOUBLE_COMP(x, y) \ #define DEFAULT_DOUBLE_COMP(x, y) \
do { \ do { \
if (isnan(x) && isnan(y)) { \ if (isnan(x) && isnan(y)) { \
return 0; \ return 0; \
} \ } \
if (isnan(x)) { \ if (isnan(x)) { \
return -1; \ return -1; \
} \ } \
if (isnan(y)) { \ if (isnan(y)) { \
return 1; \ return 1; \
} \ } \
if ((x) == (y)) { \ if (fabs((x) - (y)) <= DBL_EPSILON) { \
return 0; \ return 0; \
} else { \ } else { \
return (x) < (y) ? -1 : 1; \ return (x) < (y) ? -1 : 1; \
} \ } \
} while (0) } while (0)
#define DEFAULT_FLOAT_COMP(x, y) DEFAULT_DOUBLE_COMP(x, y) #define DEFAULT_FLOAT_COMP(x, y) DEFAULT_DOUBLE_COMP(x, y)
......
...@@ -36,6 +36,11 @@ extern int64_t tsStreamMax; ...@@ -36,6 +36,11 @@ extern int64_t tsStreamMax;
extern float tsNumOfCores; extern float tsNumOfCores;
extern int64_t tsTotalMemoryKB; extern int64_t tsTotalMemoryKB;
extern char *tsProcPath; extern char *tsProcPath;
extern char tsSIMDEnable;
extern char tsSSE42Enable;
extern char tsAVXEnable;
extern char tsAVX2Enable;
extern char tsFMAEnable;
extern char configDir[]; extern char configDir[];
extern char tsDataDir[]; extern char tsDataDir[];
......
...@@ -40,6 +40,7 @@ int32_t taosGetOsReleaseName(char *releaseName, int32_t maxLen); ...@@ -40,6 +40,7 @@ int32_t taosGetOsReleaseName(char *releaseName, int32_t maxLen);
int32_t taosGetCpuInfo(char *cpuModel, int32_t maxLen, float *numOfCores); int32_t taosGetCpuInfo(char *cpuModel, int32_t maxLen, float *numOfCores);
int32_t taosGetCpuCores(float *numOfCores); int32_t taosGetCpuCores(float *numOfCores);
void taosGetCpuUsage(double *cpu_system, double *cpu_engine); void taosGetCpuUsage(double *cpu_system, double *cpu_engine);
int32_t taosGetCpuInstructions(char* sse42, char* avx, char* avx2, char* fma);
int32_t taosGetTotalMemory(int64_t *totalKB); int32_t taosGetTotalMemory(int64_t *totalKB);
int32_t taosGetProcMemory(int64_t *usedKB); int32_t taosGetProcMemory(int64_t *usedKB);
int32_t taosGetSysMemory(int64_t *usedKB); int32_t taosGetSysMemory(int64_t *usedKB);
......
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
#define _DEFAULT_SOURCE #define _DEFAULT_SOURCE
#include "tglobal.h" #include "tglobal.h"
#include "tcompare.h"
#include "tconfig.h" #include "tconfig.h"
#include "tdatablock.h" #include "tdatablock.h"
#include "tgrant.h" #include "tgrant.h"
...@@ -312,7 +311,14 @@ static int32_t taosAddSystemCfg(SConfig *pCfg) { ...@@ -312,7 +311,14 @@ static int32_t taosAddSystemCfg(SConfig *pCfg) {
if (cfgAddLocale(pCfg, "locale", tsLocale) != 0) return -1; if (cfgAddLocale(pCfg, "locale", tsLocale) != 0) return -1;
if (cfgAddCharset(pCfg, "charset", tsCharset) != 0) return -1; if (cfgAddCharset(pCfg, "charset", tsCharset) != 0) return -1;
if (cfgAddBool(pCfg, "enableCoreFile", 1, 1) != 0) return -1; if (cfgAddBool(pCfg, "enableCoreFile", 1, 1) != 0) return -1;
if (cfgAddFloat(pCfg, "numOfCores", tsNumOfCores, 0, 100000, 1) != 0) return -1; if (cfgAddFloat(pCfg, "numOfCores", tsNumOfCores, 1, 100000, 1) != 0) return -1;
if (cfgAddBool(pCfg, "SSE42", tsSSE42Enable, 0) != 0) return -1;
if (cfgAddBool(pCfg, "AVX", tsAVXEnable, 0) != 0) return -1;
if (cfgAddBool(pCfg, "AVX2", tsAVX2Enable, 0) != 0) return -1;
if (cfgAddBool(pCfg, "FMA", tsFMAEnable, 0) != 0) return -1;
if (cfgAddBool(pCfg, "SIMD-Supported", tsSIMDEnable, 0) != 0) return -1;
if (cfgAddInt64(pCfg, "openMax", tsOpenMax, 0, INT64_MAX, 1) != 0) return -1; if (cfgAddInt64(pCfg, "openMax", tsOpenMax, 0, INT64_MAX, 1) != 0) return -1;
if (cfgAddInt64(pCfg, "streamMax", tsStreamMax, 0, INT64_MAX, 1) != 0) return -1; if (cfgAddInt64(pCfg, "streamMax", tsStreamMax, 0, INT64_MAX, 1) != 0) return -1;
if (cfgAddInt32(pCfg, "pageSizeKB", tsPageSizeKB, 0, INT64_MAX, 1) != 0) return -1; if (cfgAddInt32(pCfg, "pageSizeKB", tsPageSizeKB, 0, INT64_MAX, 1) != 0) return -1;
......
...@@ -13,7 +13,6 @@ ...@@ -13,7 +13,6 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#include <immintrin.h>
#include "builtinsimpl.h" #include "builtinsimpl.h"
#include "function.h" #include "function.h"
#include "tdatablock.h" #include "tdatablock.h"
...@@ -49,11 +48,48 @@ typedef struct SAvgRes { ...@@ -49,11 +48,48 @@ typedef struct SAvgRes {
int16_t type; // store the original input type, used in merge function int16_t type; // store the original input type, used in merge function
} SAvgRes; } SAvgRes;
static void floatVectorSumAVX(const SInputColumnInfoData* pInput, const float* plist, SAvgRes* pRes) {
#if __AVX__
// find the start position that are aligned to 32bytes address in memory
int32_t startIndex = 0; //((uint64_t)plist) & ((1<<8u)-1);
int32_t bitWidth = 8;
int32_t remain = (pInput->numOfRows - startIndex) % bitWidth;
int32_t rounds = (pInput->numOfRows - startIndex) / bitWidth;
const float* p = &plist[startIndex];
__m256 val;
__m256 sum = _mm256_setzero_ps();
for (int32_t i = 0; i < rounds; ++i) {
val = _mm256_loadu_ps(p);
sum = _mm256_add_ps(sum, val);
p += bitWidth;
}
// let sum up the final results
const float* q = (const float*)&sum;
pRes->sum.dsum += q[0] + q[1] + q[2] + q[3] + q[4] + q[5] + q[6] + q[7];
// calculate the front and the reminder items in array list
for (int32_t j = 0; j < startIndex; ++j) {
pRes->sum.dsum += plist[j];
}
startIndex += rounds * bitWidth;
for (int32_t j = 0; j < remain; ++j) {
pRes->sum.dsum += plist[j + startIndex];
}
#endif
}
static int32_t handleFloatCols(const SColumnInfoData* pCol, const SInputColumnInfoData* pInput, SAvgRes* pRes) { static int32_t handleFloatCols(const SColumnInfoData* pCol, const SInputColumnInfoData* pInput, SAvgRes* pRes) {
int32_t numOfElems = 0; int32_t numOfElems = 0;
float* plist = (float*)pCol->pData; float* plist = (float*)pCol->pData;
if (pCol->hasNull || pInput->numOfRows < 8) { const int32_t THRESHOLD_SIZE = 8;
if (pCol->hasNull || pInput->numOfRows <= THRESHOLD_SIZE) {
for (int32_t i = pInput->startRowIndex; i < pInput->numOfRows + pInput->startRowIndex; ++i) { for (int32_t i = pInput->startRowIndex; i < pInput->numOfRows + pInput->startRowIndex; ++i) {
if (colDataIsNull_f(pCol->nullbitmap, i)) { if (colDataIsNull_f(pCol->nullbitmap, i)) {
continue; continue;
...@@ -67,46 +103,13 @@ static int32_t handleFloatCols(const SColumnInfoData* pCol, const SInputColumnIn ...@@ -67,46 +103,13 @@ static int32_t handleFloatCols(const SColumnInfoData* pCol, const SInputColumnIn
numOfElems = pInput->numOfRows; numOfElems = pInput->numOfRows;
pRes->count += pInput->numOfRows; pRes->count += pInput->numOfRows;
// 1. an software version to speedup the process by using loop unwinding. // 3. If the CPU supports AVX, let's employ AVX instructions to speedup this loop
if (tsAVXEnable && tsSIMDEnable) {
floatVectorSumAVX(pInput, plist, pRes);
} else {
// 2. if both the CPU and OS support SSE4.2, let's try the faster version by using SSE4.2 SIMD for (int32_t i = pInput->startRowIndex; i < pInput->numOfRows + pInput->startRowIndex; ++i) {
pRes->sum.dsum += plist[i];
}
// 3. If both the CPU and OS support AVX, let's employ AVX instruction to speedup this loop
// 3.1 find the start position that are aligned to 32bytes address in memory
int32_t startElem = 0;//((uint64_t)plist) & ((1<<8u)-1);
int32_t i = 0;
int32_t bitWidth = 8;
int32_t remain = (pInput->numOfRows - startElem) % bitWidth;
int32_t rounds = (pInput->numOfRows - startElem) / bitWidth;
const float* p = &plist[startElem];
__m256 loadVal;
__m256 sum = _mm256_setzero_ps();
for(; i < rounds; ++i) {
loadVal = _mm256_loadu_ps(p);
sum = _mm256_add_ps(sum, loadVal);
p += bitWidth;
}
// let sum up the final results
const float* q = (const float*)&sum;
pRes->sum.dsum += q[0] + q[1] + q[2] + q[3] + q[4] + q[5] + q[6] + q[7];
// calculate the front and the reminder items in array list
for(int32_t j = 0; j < startElem; ++j) {
pRes->sum.dsum += plist[j];
}
startElem += rounds * bitWidth;
for(int32_t j = 0; j < remain; ++j) {
pRes->sum.dsum += plist[j + startElem];
} }
} }
......
...@@ -13,20 +13,163 @@ ...@@ -13,20 +13,163 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#include <immintrin.h>
#include "builtinsimpl.h" #include "builtinsimpl.h"
#include "function.h" #include "function.h"
#include "tdatablock.h" #include "tdatablock.h"
#include "tfunctionInt.h" #include "tfunctionInt.h"
#include "tglobal.h" #include "tglobal.h"
static int32_t i32VectorCmpAVX2(const int32_t* pData, int32_t numOfRows, bool isMinFunc) {
int32_t v = 0;
#if __AVX2__
int32_t startElem = 0;//((uint64_t)plist) & ((1<<8u)-1);
int32_t bitWidth = 8;
int32_t remain = (numOfRows - startElem) % bitWidth;
int32_t rounds = (numOfRows - startElem) / bitWidth;
const int32_t* p = &pData[startElem];
__m256i next;
__m256i initialVal = _mm256_loadu_si256((__m256i*)p);
p += bitWidth;
if (!isMinFunc) { // max function
for (int32_t i = 0; i < rounds; ++i) {
next = _mm256_loadu_si256((__m256i*)p);
initialVal = _mm256_max_epi32(initialVal, next);
p += bitWidth;
}
// let sum up the final results
const int32_t* q = (const int32_t*)&initialVal;
v = TMAX(q[0], q[1]);
v = TMAX(v, q[2]);
v = TMAX(v, q[3]);
v = TMAX(v, q[4]);
v = TMAX(v, q[5]);
v = TMAX(v, q[6]);
v = TMAX(v, q[7]);
// calculate the front and the reminder items in array list
startElem += rounds * bitWidth;
for (int32_t j = 0; j < remain; ++j) {
if (v < p[j + startElem]) {
v = p[j + startElem];
}
}
} else { // min function
for (int32_t i = 0; i < rounds; ++i) {
next = _mm256_loadu_si256((__m256i*)p);
initialVal = _mm256_min_epi32(initialVal, next);
p += bitWidth;
}
// let sum up the final results
const int32_t* q = (const int32_t*)&initialVal;
v = TMIN(q[0], q[1]);
v = TMIN(v, q[2]);
v = TMIN(v, q[3]);
v = TMIN(v, q[4]);
v = TMIN(v, q[5]);
v = TMIN(v, q[6]);
v = TMIN(v, q[7]);
// calculate the front and the remainder items in array list
startElem += rounds * bitWidth;
for (int32_t j = 0; j < remain; ++j) {
if (v > p[j + startElem]) {
v = p[j + startElem];
}
}
}
#endif
return v;
}
static float floatVectorCmpAVX(const float* pData, int32_t numOfRows, bool isMinFunc) {
float v = 0;
#if __AVX__
int32_t startElem = 0;//((uint64_t)plist) & ((1<<8u)-1);
int32_t i = 0;
int32_t bitWidth = 8;
int32_t remain = (numOfRows - startElem) % bitWidth;
int32_t rounds = (numOfRows - startElem) / bitWidth;
const float* p = &pData[startElem];
__m256 next;
__m256 initialVal = _mm256_loadu_ps(p);
p += bitWidth;
if (!isMinFunc) { // max function
for (; i < rounds; ++i) {
next = _mm256_loadu_ps(p);
initialVal = _mm256_max_ps(initialVal, next);
p += bitWidth;
}
// let sum up the final results
const float* q = (const float*)&initialVal;
v = TMAX(q[0], q[1]);
v = TMAX(v, q[2]);
v = TMAX(v, q[3]);
v = TMAX(v, q[4]);
v = TMAX(v, q[5]);
v = TMAX(v, q[6]);
v = TMAX(v, q[7]);
// calculate the front and the reminder items in array list
startElem += rounds * bitWidth;
for (int32_t j = 0; j < remain; ++j) {
if (v < p[j + startElem]) {
v = p[j + startElem];
}
}
} else { // min function
for (; i < rounds; ++i) {
next = _mm256_loadu_ps(p);
initialVal = _mm256_min_ps(initialVal, next);
p += bitWidth;
}
// let sum up the final results
const float* q = (const float*)&initialVal;
v = TMIN(q[0], q[1]);
v = TMIN(v, q[2]);
v = TMIN(v, q[3]);
v = TMIN(v, q[4]);
v = TMIN(v, q[5]);
v = TMIN(v, q[6]);
v = TMIN(v, q[7]);
// calculate the front and the reminder items in array list
startElem += rounds * bitWidth;
for (int32_t j = 0; j < remain; ++j) {
if (v > p[j + startElem]) {
v = p[j + startElem];
}
}
}
#endif
return v;
}
static int32_t handleInt32Col(SColumnInfoData* pCol, int32_t start, int32_t numOfRows, SqlFunctionCtx* pCtx, static int32_t handleInt32Col(SColumnInfoData* pCol, int32_t start, int32_t numOfRows, SqlFunctionCtx* pCtx,
SMinmaxResInfo* pBuf, bool isMinFunc) { SMinmaxResInfo* pBuf, bool isMinFunc) {
int32_t* pData = (int32_t*)pCol->pData; int32_t* pData = (int32_t*)pCol->pData;
int32_t* val = (int32_t*)&pBuf->v; int32_t* val = (int32_t*)&pBuf->v;
int32_t numOfElems = 0; int32_t numOfElems = 0;
if (pCol->hasNull || numOfRows < 8 || pCtx->subsidiaries.num > 0) { if (pCol->hasNull || numOfRows <= 8 || pCtx->subsidiaries.num > 0) {
if (isMinFunc) { // min if (isMinFunc) { // min
for (int32_t i = start; i < start + numOfRows; ++i) { for (int32_t i = start; i < start + numOfRows; ++i) {
if (colDataIsNull_f(pCol->nullbitmap, i)) { if (colDataIsNull_f(pCol->nullbitmap, i)) {
...@@ -77,79 +220,30 @@ static int32_t handleInt32Col(SColumnInfoData* pCol, int32_t start, int32_t numO ...@@ -77,79 +220,30 @@ static int32_t handleInt32Col(SColumnInfoData* pCol, int32_t start, int32_t numO
} }
} }
} else { // not has null value } else { // not has null value
// 1. software version // AVX2 version to speedup the loop
if (tsAVX2Enable && tsSIMDEnable) {
*val = i32VectorCmpAVX2(pData, numOfRows, isMinFunc);
} else {
if (!pBuf->assign) {
// 3. AVX2 version to speedup the loop *val = pData[0];
int32_t startElem = 0;//((uint64_t)plist) & ((1<<8u)-1); pBuf->assign = true;
int32_t i = 0;
int32_t bitWidth = 8;
int32_t v = 0;
int32_t remain = (numOfRows - startElem) % bitWidth;
int32_t rounds = (numOfRows - startElem) / bitWidth;
const int32_t* p = &pData[startElem];
__m256i next;
__m256i initialVal = _mm256_loadu_si256((__m256i*)p);
p += bitWidth;
if (!isMinFunc) { // max function
for (; i < rounds; ++i) {
next = _mm256_loadu_si256((__m256i*)p);
initialVal = _mm256_max_epi32(initialVal, next);
p += bitWidth;
} }
// let sum up the final results if (isMinFunc) { // min
const int32_t* q = (const int32_t*)&initialVal; for (int32_t i = start; i < start + numOfRows; ++i) {
if (*val > pData[i]) {
v = TMAX(q[0], q[1]); *val = pData[i];
v = TMAX(v, q[2]); }
v = TMAX(v, q[3]);
v = TMAX(v, q[4]);
v = TMAX(v, q[5]);
v = TMAX(v, q[6]);
v = TMAX(v, q[7]);
// calculate the front and the reminder items in array list
startElem += rounds * bitWidth;
for (int32_t j = 0; j < remain; ++j) {
if (v < p[j + startElem]) {
v = p[j + startElem];
} }
} } else { // max
} else { // min function for (int32_t i = start; i < start + numOfRows; ++i) {
for (; i < rounds; ++i) { if (*val < pData[i]) {
next = _mm256_loadu_si256((__m256i*)p); *val = pData[i];
initialVal = _mm256_min_epi32(initialVal, next); }
p += bitWidth;
}
// let sum up the final results
const int32_t* q = (const int32_t*)&initialVal;
v = TMIN(q[0], q[1]);
v = TMIN(v, q[2]);
v = TMIN(v, q[3]);
v = TMIN(v, q[4]);
v = TMIN(v, q[5]);
v = TMIN(v, q[6]);
v = TMIN(v, q[7]);
// calculate the front and the reminder items in array list
startElem += rounds * bitWidth;
for (int32_t j = 0; j < remain; ++j) {
if (v > p[j + startElem]) {
v = p[j + startElem];
} }
} }
} }
*val = v;
numOfElems = numOfRows; numOfElems = numOfRows;
} }
...@@ -213,79 +307,30 @@ static int32_t handleFloatCol(SColumnInfoData* pCol, int32_t start, int32_t numO ...@@ -213,79 +307,30 @@ static int32_t handleFloatCol(SColumnInfoData* pCol, int32_t start, int32_t numO
} }
} }
} else { // not has null value } else { // not has null value
// 1. software version // AVX version to speedup the loop
if (tsAVXEnable && tsSIMDEnable) {
*val = (double) floatVectorCmpAVX(pData, numOfRows, isMinFunc);
} else {
if (!pBuf->assign) {
// 3. AVX2 version to speedup the loop *val = pData[0];
int32_t startElem = 0;//((uint64_t)plist) & ((1<<8u)-1); pBuf->assign = true;
int32_t i = 0;
int32_t bitWidth = 8;
float v = 0;
int32_t remain = (numOfRows - startElem) % bitWidth;
int32_t rounds = (numOfRows - startElem) / bitWidth;
const float* p = &pData[startElem];
__m256 next;
__m256 initialVal = _mm256_loadu_ps(p);
p += bitWidth;
if (!isMinFunc) { // max function
for (; i < rounds; ++i) {
next = _mm256_loadu_ps(p);
initialVal = _mm256_max_ps(initialVal, next);
p += bitWidth;
} }
// let sum up the final results if (isMinFunc) { // min
const float* q = (const float*)&initialVal; for (int32_t i = start; i < start + numOfRows; ++i) {
if (*val > pData[i]) {
v = TMAX(q[0], q[1]); *val = pData[i];
v = TMAX(v, q[2]); }
v = TMAX(v, q[3]);
v = TMAX(v, q[4]);
v = TMAX(v, q[5]);
v = TMAX(v, q[6]);
v = TMAX(v, q[7]);
// calculate the front and the reminder items in array list
startElem += rounds * bitWidth;
for (int32_t j = 0; j < remain; ++j) {
if (v < p[j + startElem]) {
v = p[j + startElem];
} }
} } else { // max
} else { // min function for (int32_t i = start; i < start + numOfRows; ++i) {
for (; i < rounds; ++i) { if (*val < pData[i]) {
next = _mm256_loadu_ps(p); *val = pData[i];
initialVal = _mm256_min_ps(initialVal, next); }
p += bitWidth;
}
// let sum up the final results
const float* q = (const float*)&initialVal;
v = TMIN(q[0], q[1]);
v = TMIN(v, q[2]);
v = TMIN(v, q[3]);
v = TMIN(v, q[4]);
v = TMIN(v, q[5]);
v = TMIN(v, q[6]);
v = TMIN(v, q[7]);
// calculate the front and the reminder items in array list
startElem += rounds * bitWidth;
for (int32_t j = 0; j < remain; ++j) {
if (v > p[j + startElem]) {
v = p[j + startElem];
} }
} }
} }
*val = v;
numOfElems = numOfRows; numOfElems = numOfRows;
} }
......
...@@ -37,6 +37,12 @@ float tsNumOfCores = 0; ...@@ -37,6 +37,12 @@ float tsNumOfCores = 0;
int64_t tsTotalMemoryKB = 0; int64_t tsTotalMemoryKB = 0;
char *tsProcPath = NULL; char *tsProcPath = NULL;
char tsSIMDEnable = 0;
char tsSSE42Enable = 0;
char tsAVXEnable = 0;
char tsAVX2Enable = 0;
char tsFMAEnable = 0;
void osDefaultInit() { void osDefaultInit() {
taosSeedRand(taosSafeRand()); taosSeedRand(taosSafeRand());
taosGetSystemLocale(tsLocale, tsCharset); taosGetSystemLocale(tsLocale, tsCharset);
...@@ -99,7 +105,7 @@ bool osDataSpaceSufficient() { return tsDataSpace.size.avail > tsDataSpace.reser ...@@ -99,7 +105,7 @@ bool osDataSpaceSufficient() { return tsDataSpace.size.avail > tsDataSpace.reser
bool osTempSpaceSufficient() { return tsTempSpace.size.avail > tsTempSpace.reserved; } bool osTempSpaceSufficient() { return tsTempSpace.size.avail > tsTempSpace.reserved; }
void osSetTimezone(const char *timezone) { taosSetSystemTimezone(timezone, tsTimezoneStr, &tsDaylight, &tsTimezone); } void osSetTimezone(const char *tz) { taosSetSystemTimezone(tz, tsTimezoneStr, &tsDaylight, &tsTimezone); }
void osSetSystemLocale(const char *inLocale, const char *inCharSet) { void osSetSystemLocale(const char *inLocale, const char *inCharSet) {
memcpy(tsLocale, inLocale, strlen(inLocale) + 1); memcpy(tsLocale, inLocale, strlen(inLocale) + 1);
......
...@@ -775,6 +775,7 @@ int64_t taosGetLineFile(TdFilePtr pFile, char **__restrict ptrBuf) { ...@@ -775,6 +775,7 @@ int64_t taosGetLineFile(TdFilePtr pFile, char **__restrict ptrBuf) {
return getline(ptrBuf, &len, pFile->fp); return getline(ptrBuf, &len, pFile->fp);
#endif #endif
} }
int64_t taosGetsFile(TdFilePtr pFile, int32_t maxSize, char *__restrict buf) { int64_t taosGetsFile(TdFilePtr pFile, int32_t maxSize, char *__restrict buf) {
if (pFile == NULL || buf == NULL) { if (pFile == NULL || buf == NULL) {
return -1; return -1;
...@@ -785,6 +786,7 @@ int64_t taosGetsFile(TdFilePtr pFile, int32_t maxSize, char *__restrict buf) { ...@@ -785,6 +786,7 @@ int64_t taosGetsFile(TdFilePtr pFile, int32_t maxSize, char *__restrict buf) {
} }
return strlen(buf); return strlen(buf);
} }
int32_t taosEOFFile(TdFilePtr pFile) { int32_t taosEOFFile(TdFilePtr pFile) {
if (pFile == NULL) { if (pFile == NULL) {
return 0; return 0;
......
...@@ -67,6 +67,9 @@ char *taosCharsetReplace(char *charsetstr) { ...@@ -67,6 +67,9 @@ char *taosCharsetReplace(char *charsetstr) {
} }
/** /**
* TODO: here we may employ the systemctl API to set/get the correct locale on the Linux. In some cases, the setlocale
* seems does not response as expected.
*
* In some Linux systems, setLocale(LC_CTYPE, "") may return NULL, in which case the launch of * In some Linux systems, setLocale(LC_CTYPE, "") may return NULL, in which case the launch of
* both the TDengine Server and the Client may be interrupted. * both the TDengine Server and the Client may be interrupted.
* *
...@@ -148,7 +151,7 @@ void taosGetSystemLocale(char *outLocale, char *outCharset) { ...@@ -148,7 +151,7 @@ void taosGetSystemLocale(char *outLocale, char *outCharset) {
* *
* example: en_US.UTF-8, zh_CN.GB18030, zh_CN.UTF-8, * example: en_US.UTF-8, zh_CN.GB18030, zh_CN.UTF-8,
* *
* if user does not specify the locale in taos.cfg the program use default LC_CTYPE as system locale. * If user does not specify the locale in taos.cfg, the program then uses default LC_CTYPE as system locale.
* *
* In case of some CentOS systems, their default locale is "en_US.utf8", which is not valid code_page * In case of some CentOS systems, their default locale is "en_US.utf8", which is not valid code_page
* for libiconv that is employed to convert string in this system. This program will automatically use * for libiconv that is employed to convert string in this system. This program will automatically use
......
...@@ -155,8 +155,8 @@ static int32_t taosGetSysCpuInfo(SysCpuInfo *cpuInfo) { ...@@ -155,8 +155,8 @@ static int32_t taosGetSysCpuInfo(SysCpuInfo *cpuInfo) {
} }
char line[1024]; char line[1024];
ssize_t _bytes = taosGetsFile(pFile, sizeof(line), line); ssize_t bytes = taosGetsFile(pFile, sizeof(line), line);
if ((_bytes < 0) || (line == NULL)) { if (bytes < 0) {
taosCloseFile(&pFile); taosCloseFile(&pFile);
return -1; return -1;
} }
...@@ -193,9 +193,9 @@ static int32_t taosGetProcCpuInfo(ProcCpuInfo *cpuInfo) { ...@@ -193,9 +193,9 @@ static int32_t taosGetProcCpuInfo(ProcCpuInfo *cpuInfo) {
return -1; return -1;
} }
char line[1024]; char line[1024] = {0};
ssize_t _bytes = taosGetsFile(pFile, sizeof(line), line); ssize_t bytes = taosGetsFile(pFile, sizeof(line), line);
if ((_bytes < 0) || (line == NULL)) { if (bytes < 0) {
taosCloseFile(&pFile); taosCloseFile(&pFile);
return -1; return -1;
} }
...@@ -239,6 +239,7 @@ void taosGetSystemInfo() { ...@@ -239,6 +239,7 @@ void taosGetSystemInfo() {
taosGetCpuCores(&tsNumOfCores); taosGetCpuCores(&tsNumOfCores);
taosGetTotalMemory(&tsTotalMemoryKB); taosGetTotalMemory(&tsTotalMemoryKB);
taosGetCpuUsage(NULL, NULL); taosGetCpuUsage(NULL, NULL);
taosGetCpuInstructions(&tsSSE42Enable, &tsAVXEnable, &tsAVX2Enable, &tsFMAEnable);
#endif #endif
} }
...@@ -366,7 +367,7 @@ int32_t taosGetCpuInfo(char *cpuModel, int32_t maxLen, float *numOfCores) { ...@@ -366,7 +367,7 @@ int32_t taosGetCpuInfo(char *cpuModel, int32_t maxLen, float *numOfCores) {
return code; return code;
#else #else
char line[1024]; char line[1024] = {0};
size_t size = 0; size_t size = 0;
int32_t done = 0; int32_t done = 0;
int32_t code = -1; int32_t code = -1;
...@@ -468,6 +469,46 @@ void taosGetCpuUsage(double *cpu_system, double *cpu_engine) { ...@@ -468,6 +469,46 @@ void taosGetCpuUsage(double *cpu_system, double *cpu_engine) {
} }
} }
#define __cpuid_fix(level, a, b, c, d) \
__asm__("xor %%ecx, %%ecx\n" \
"cpuid\n" \
: "=a"(a), "=b"(b), "=c"(c), "=d"(d) \
: "0"(level))
// todo add for windows and mac
int32_t taosGetCpuInstructions(char* sse42, char* avx, char* avx2, char* fma) {
#ifdef WINDOWS
#elif defined(_TD_DARWIN_64)
#else
// Since the compiler is not support avx/avx2 instructions, the global variables always need to be
// set to be false
#if __AVX__ || __AVX2__
tsSIMDEnable = true;
#else
tsSIMDEnable = false;
#endif
uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
int32_t ret = __get_cpuid(1, &eax, &ebx, &ecx, &edx);
if (ret == 0) {
return -1; // failed to get the cpuid info
}
*sse42 = (char) ((ecx & bit_SSE4_2) == bit_SSE4_2);
*avx = (char) ((ecx & bit_AVX) == bit_AVX);
*fma = (char) ((ecx & bit_FMA) == bit_FMA);
// work around a bug in GCC.
// Ref to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=77756
__cpuid_fix(7u, eax, ebx, ecx, edx);
*avx2 = (char) ((ebx & bit_AVX2) == bit_AVX2);
return 0;
#endif
}
int32_t taosGetTotalMemory(int64_t *totalKB) { int32_t taosGetTotalMemory(int64_t *totalKB) {
#ifdef WINDOWS #ifdef WINDOWS
MEMORYSTATUSEX memsStat; MEMORYSTATUSEX memsStat;
...@@ -511,11 +552,11 @@ int32_t taosGetProcMemory(int64_t *usedKB) { ...@@ -511,11 +552,11 @@ int32_t taosGetProcMemory(int64_t *usedKB) {
return -1; return -1;
} }
ssize_t _bytes = 0; ssize_t bytes = 0;
char line[1024]; char line[1024] = {0};
while (!taosEOFFile(pFile)) { while (!taosEOFFile(pFile)) {
_bytes = taosGetsFile(pFile, sizeof(line), line); bytes = taosGetsFile(pFile, sizeof(line), line);
if ((_bytes < 0) || (line == NULL)) { if (bytes < 0) {
break; break;
} }
if (strstr(line, "VmRSS:") != NULL) { if (strstr(line, "VmRSS:") != NULL) {
...@@ -523,7 +564,7 @@ int32_t taosGetProcMemory(int64_t *usedKB) { ...@@ -523,7 +564,7 @@ int32_t taosGetProcMemory(int64_t *usedKB) {
} }
} }
if (line == NULL) { if (strlen(line) < 0) {
// printf("read file:%s failed", tsProcMemFile); // printf("read file:%s failed", tsProcMemFile);
taosCloseFile(&pFile); taosCloseFile(&pFile);
return -1; return -1;
...@@ -624,14 +665,14 @@ int32_t taosGetProcIO(int64_t *rchars, int64_t *wchars, int64_t *read_bytes, int ...@@ -624,14 +665,14 @@ int32_t taosGetProcIO(int64_t *rchars, int64_t *wchars, int64_t *read_bytes, int
TdFilePtr pFile = taosOpenFile(tsProcIOFile, TD_FILE_READ | TD_FILE_STREAM); TdFilePtr pFile = taosOpenFile(tsProcIOFile, TD_FILE_READ | TD_FILE_STREAM);
if (pFile == NULL) return -1; if (pFile == NULL) return -1;
ssize_t _bytes = 0; ssize_t bytes = 0;
char line[1024]; char line[1024] = {0};
char tmp[24]; char tmp[24];
int readIndex = 0; int readIndex = 0;
while (!taosEOFFile(pFile)) { while (!taosEOFFile(pFile)) {
_bytes = taosGetsFile(pFile, sizeof(line), line); bytes = taosGetsFile(pFile, sizeof(line), line);
if (_bytes < 10 || line == NULL) { if (bytes < 10) {
break; break;
} }
if (strstr(line, "rchar:") != NULL) { if (strstr(line, "rchar:") != NULL) {
......
...@@ -339,7 +339,7 @@ char *taosStrpTime(const char *buf, const char *fmt, struct tm *tm) { ...@@ -339,7 +339,7 @@ char *taosStrpTime(const char *buf, const char *fmt, struct tm *tm) {
#endif #endif
} }
FORCE_INLINE int32_t taosGetTimeOfDay(struct timeval *tv) { int32_t taosGetTimeOfDay(struct timeval *tv) {
#ifdef WINDOWS #ifdef WINDOWS
time_t t; time_t t;
t = taosGetTimestampSec(); t = taosGetTimestampSec();
...@@ -455,6 +455,7 @@ static int isLeapYear(time_t year) { ...@@ -455,6 +455,7 @@ static int isLeapYear(time_t year) {
else else
return 1; return 1;
} }
struct tm *taosLocalTimeNolock(struct tm *result, const time_t *timep, int dst) { struct tm *taosLocalTimeNolock(struct tm *result, const time_t *timep, int dst) {
if (result == NULL) { if (result == NULL) {
return localtime(timep); return localtime(timep);
...@@ -542,7 +543,9 @@ struct tm *taosLocalTimeNolock(struct tm *result, const time_t *timep, int dst) ...@@ -542,7 +543,9 @@ struct tm *taosLocalTimeNolock(struct tm *result, const time_t *timep, int dst)
#endif #endif
return result; return result;
} }
int32_t taosGetTimestampSec() { return (int32_t)time(NULL); } int32_t taosGetTimestampSec() { return (int32_t)time(NULL); }
int32_t taosClockGetTime(int clock_id, struct timespec *pTS) { int32_t taosClockGetTime(int clock_id, struct timespec *pTS) {
#ifdef WINDOWS #ifdef WINDOWS
LARGE_INTEGER t; LARGE_INTEGER t;
......
...@@ -561,13 +561,13 @@ void cfgDumpCfg(SConfig *pCfg, bool tsc, bool dump) { ...@@ -561,13 +561,13 @@ void cfgDumpCfg(SConfig *pCfg, bool tsc, bool dump) {
if (dump && strcmp(pItem->name, "scriptDir") == 0) continue; if (dump && strcmp(pItem->name, "scriptDir") == 0) continue;
if (dump && strcmp(pItem->name, "simDebugFlag") == 0) continue; if (dump && strcmp(pItem->name, "simDebugFlag") == 0) continue;
tstrncpy(src, cfgStypeStr(pItem->stype), CFG_SRC_PRINT_LEN); tstrncpy(src, cfgStypeStr(pItem->stype), CFG_SRC_PRINT_LEN);
for (int32_t i = 0; i < CFG_SRC_PRINT_LEN; ++i) { for (int32_t j = 0; j < CFG_SRC_PRINT_LEN; ++j) {
if (src[i] == 0) src[i] = ' '; if (src[j] == 0) src[j] = ' ';
} }
tstrncpy(name, pItem->name, CFG_NAME_PRINT_LEN); tstrncpy(name, pItem->name, CFG_NAME_PRINT_LEN);
for (int32_t i = 0; i < CFG_NAME_PRINT_LEN; ++i) { for (int32_t j = 0; j < CFG_NAME_PRINT_LEN; ++j) {
if (name[i] == 0) name[i] = ' '; if (name[j] == 0) name[j] = ' ';
} }
switch (pItem->dtype) { switch (pItem->dtype) {
......
...@@ -24,7 +24,6 @@ ...@@ -24,7 +24,6 @@
#endif #endif
#include "tcrc32c.h" #include "tcrc32c.h"
#include "tdef.h"
#define POLY 0x82f63b78 #define POLY 0x82f63b78
#define LONG_SHIFT 8192 #define LONG_SHIFT 8192
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册