diff --git a/include/util/tutil.h b/include/util/tutil.h index de963001557519b7dd9355187122535c77b3f17a..9f36cdba7c4d12a6c71b93a53633803ee73bd416 100644 --- a/include/util/tutil.h +++ b/include/util/tutil.h @@ -34,6 +34,8 @@ char *strtolower(char *dst, const char *src); char *strntolower(char *dst, const char *src, int32_t n); char *strntolower_s(char *dst, const char *src, int32_t n); int64_t strnatoi(char *num, int32_t len); +size_t tstrncspn(const char *str, size_t ssize, const char *reject, size_t rsize); + char *strbetween(char *string, char *begin, char *end); char *paGetToken(char *src, char **token, int32_t *tokenLen); diff --git a/source/util/src/tcompare.c b/source/util/src/tcompare.c index 21e7d9e0cd07703c490f97f01c7cd43dd48254ff..54b6d6d2656168673eba7f19949ee18a020af580 100644 --- a/source/util/src/tcompare.c +++ b/source/util/src/tcompare.c @@ -17,6 +17,7 @@ #define _XOPEN_SOURCE #define _DEFAULT_SOURCE #include "tcompare.h" +#include "tutil.h" #include "regex.h" #include "tdef.h" #include "thash.h" @@ -1014,16 +1015,12 @@ int32_t patternMatch(const char *pattern, size_t psize, const char *str, size_t int32_t j = 0; int32_t nMatchChar = 0; - while ((c = pattern[i++]) != 0 && (i <= psize)) { + while ((i < psize) && ((c = pattern[i++]) != 0)) { if (c == pInfo->matchAll) { /* Match "*" */ - while ((c = pattern[i++]) == pInfo->matchAll || c == pInfo->matchOne) { - if (i > psize) { // overflow check - break; - } - + while ((i < psize) && ((c = pattern[i++]) == pInfo->matchAll || c == pInfo->matchOne)) { if (c == pInfo->matchOne) { - if (j > ssize || str[j++] == 0) { // empty string, return not match + if (j >= ssize || str[j++] == 0) { // empty string, return not match return TSDB_PATTERN_NOWILDCARDMATCH; } else { ++nMatchChar; @@ -1031,21 +1028,21 @@ int32_t patternMatch(const char *pattern, size_t psize, const char *str, size_t } } - if (c == 0 || i > psize) { + if (i >= psize && (c == pInfo->umatchOne || c == pInfo->umatchAll)) { return TSDB_PATTERN_MATCH; /* "*" at the end of the pattern matches */ } - char acceptArray[3] = {toupper(c), tolower(c), 0}; + char rejectList[2] = {toupper(c), tolower(c)}; str += nMatchChar; int32_t remain = ssize - nMatchChar; while (1) { - size_t n = strcspn(str, acceptArray); + size_t n = tstrncspn(str, remain, rejectList, 2); str += n; remain -= n; - if (str[0] == 0 || (remain <= 0)) { + if ((remain <= 0) || str[0] == 0) { break; } @@ -1075,7 +1072,7 @@ int32_t patternMatch(const char *pattern, size_t psize, const char *str, size_t return TSDB_PATTERN_NOMATCH; } - return (str[j] == 0 || j >= ssize) ? TSDB_PATTERN_MATCH : TSDB_PATTERN_NOMATCH; + return (j >= ssize || str[j] == 0) ? TSDB_PATTERN_MATCH : TSDB_PATTERN_NOMATCH; } int32_t wcsPatternMatch(const TdUcs4 *pattern, size_t psize, const TdUcs4 *str, size_t ssize, const SPatternCompareInfo *pInfo) { @@ -1085,14 +1082,10 @@ int32_t wcsPatternMatch(const TdUcs4 *pattern, size_t psize, const TdUcs4 *str, int32_t j = 0; int32_t nMatchChar = 0; - while ((c = pattern[i++]) != 0 && (i <= psize)) { - /* Match "%" */ - if (c == pInfo->umatchAll) { - while ((c = pattern[i++]) == pInfo->umatchAll || c == pInfo->umatchOne) { - if (i > psize) { - break; - } + while ((i < psize) && ((c = pattern[i++]) != 0)) { + if (c == pInfo->umatchAll) { /* Match "%" */ + while ((i < psize) && ((c = pattern[i++]) == pInfo->umatchAll || c == pInfo->umatchOne)) { if (c == pInfo->umatchOne) { if (j >= ssize || str[j++] == 0) { return TSDB_PATTERN_NOWILDCARDMATCH; @@ -1102,7 +1095,7 @@ int32_t wcsPatternMatch(const TdUcs4 *pattern, size_t psize, const TdUcs4 *str, } } - if (c == 0 || i > psize) { + if (i >= psize && (c == pInfo->umatchOne || c == pInfo->umatchAll)) { return TSDB_PATTERN_MATCH; } @@ -1116,11 +1109,11 @@ int32_t wcsPatternMatch(const TdUcs4 *pattern, size_t psize, const TdUcs4 *str, str += n; remain -= n; - if (str[0] == 0 || (remain <= 0)) { + if ((remain <= 0) || str[0] == 0) { break; } - int32_t ret = wcsPatternMatch(&pattern[i], psize-i, ++str, --remain, pInfo); + int32_t ret = wcsPatternMatch(&pattern[i], psize - i, ++str, --remain, pInfo); if (ret != TSDB_PATTERN_NOMATCH) { return ret; } @@ -1146,7 +1139,7 @@ int32_t wcsPatternMatch(const TdUcs4 *pattern, size_t psize, const TdUcs4 *str, return TSDB_PATTERN_NOMATCH; } - return (str[j] == 0 || j >= ssize) ? TSDB_PATTERN_MATCH : TSDB_PATTERN_NOMATCH; + return (j >= ssize || str[j] == 0) ? TSDB_PATTERN_MATCH : TSDB_PATTERN_NOMATCH; } int32_t comparestrRegexNMatch(const void *pLeft, const void *pRight) { @@ -1198,7 +1191,7 @@ int32_t comparestrRegexMatch(const void *pLeft, const void *pRight) { int32_t comparewcsRegexMatch(const void* pString, const void* pPattern) { size_t len = varDataLen(pPattern); - char *pattern = taosMemoryMalloc(len + 1); + char *pattern = taosMemoryMalloc(len + TSDB_NCHAR_SIZE); int convertLen = taosUcs4ToMbs((TdUcs4 *)varDataVal(pPattern), len, pattern); if (convertLen < 0) { @@ -1206,7 +1199,7 @@ int32_t comparewcsRegexMatch(const void* pString, const void* pPattern) { return TSDB_CODE_APP_ERROR; } - pattern[len] = 0; + pattern[convertLen] = 0; len = varDataLen(pString); char *str = taosMemoryMalloc(len + 1); @@ -1218,7 +1211,7 @@ int32_t comparewcsRegexMatch(const void* pString, const void* pPattern) { return TSDB_CODE_APP_ERROR; } - str[len] = 0; + str[convertLen] = 0; int32_t ret = doExecRegexMatch(str, pattern); diff --git a/source/util/src/tutil.c b/source/util/src/tutil.c index addb9f55ba9760f8f5d6f915836ca22260f67333..7297849870a59dfda68977fe380170a74ea484fb 100644 --- a/source/util/src/tutil.c +++ b/source/util/src/tutil.c @@ -376,3 +376,59 @@ void taosIp2String(uint32_t ip, char *str) { void taosIpPort2String(uint32_t ip, uint16_t port, char *str) { sprintf(str, "%u.%u.%u.%u:%u", ip & 0xFF, (ip >> 8) & 0xFF, (ip >> 16) & 0xFF, (uint8_t)(ip >> 24), port); } + +size_t tstrncspn(const char *str, size_t size, const char *reject, size_t rsize) { + if (rsize == 0 || rsize == 1) { + char* p = strnchr(str, reject[0], size, false); + return (p == NULL)? size:(p-str); + } + + /* Use multiple small memsets to enable inlining on most targets. */ + unsigned char table[256]; + unsigned char *p = memset(table, 0, 64); + memset(p + 64, 0, 64); + memset(p + 128, 0, 64); + memset(p + 192, 0, 64); + + unsigned char *s = (unsigned char *)reject; + int32_t index = 0; + do { + p[s[index++]] = 1; + } while (index < rsize); + + s = (unsigned char*) str; + int32_t times = size >> 2; + if (times == 0) { + for(int32_t i = 0; i < size; ++i) { + if (p[s[i]]) { + return i; + } + } + + return size; + } + + index = 0; + uint32_t c0, c1, c2, c3; + for(int32_t i = 0; i < times; ++i, index += 4) { + int32_t j = index; + c0 = p[s[j]]; + c1 = p[s[j + 1]]; + c2 = p[s[j + 2]]; + c3 = p[s[j + 3]]; + + if ((c0 | c1 | c2 | c3) != 0) { + size_t count = ((i + 1) >> 2); + return (c0 | c1) != 0 ? count - c0 + 1 : count - c2 + 3; + } + } + + int32_t offset = times * 4; + for(int32_t i = offset; i < size; ++i) { + if (p[s[i]]) { + return i; + } + } + + return size; +} diff --git a/source/util/test/utilTests.cpp b/source/util/test/utilTests.cpp index 27496ff9b9a780d3b8e2728195f480b7a5caa72d..5ab93bedd3c774e4e35465e48383c95bf004e20b 100644 --- a/source/util/test/utilTests.cpp +++ b/source/util/test/utilTests.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include "tarray.h" @@ -232,4 +233,35 @@ TEST(utilTest, char_pattern_match_no_terminated) { const char* str10 = "6_6"; ret = patternMatch(pattern10, 1, str10, strlen(str10), &pInfo); ASSERT_EQ(ret, TSDB_PATTERN_MATCH); +} + +TEST(utilTest, tstrncspn) { + const char* p1 = "abc"; + const char* reject = "d"; + size_t v = tstrncspn(p1, strlen(p1), reject, 1); + ASSERT_EQ(v, 3); + + const char* reject1 = "a"; + v = tstrncspn(p1, strlen(p1), reject1, 1); + ASSERT_EQ(v, 0); + + const char* reject2 = "de"; + v = tstrncspn(p1, strlen(p1), reject2, 2); + ASSERT_EQ(v, 3); + + const char* p2 = "abcdefghijklmn"; + v = tstrncspn(p2, strlen(p2), reject2, 2); + ASSERT_EQ(v, 3); + + const char* reject3 = "12345n"; + v = tstrncspn(p2, strlen(p2), reject3, 6); + ASSERT_EQ(v, 13); + + const char* reject4 = ""; + v = tstrncspn(p2, strlen(p2), reject4, 0); + ASSERT_EQ(v, 14); + + const char* reject5 = "911"; + v = tstrncspn(p2, strlen(p2), reject5, 0); + ASSERT_EQ(v, 14); } \ No newline at end of file