From 693ee662af583f9a0ae8ac02f56faad702f12e86 Mon Sep 17 00:00:00 2001 From: localvar Date: Sat, 24 Aug 2019 14:04:56 +0800 Subject: [PATCH] fix several issues in string token parsing 1. the shell should not remove escape sequence \' and \" in a string. 2. `tsParseTime` should not unescape the next string token (this issue appears after the first issue was fixed). 3. `value[4] != '-'` in `tsParseTime` crashes in rare case if `value[4]` is in unallocated virtual memory. 4. `operator[x]` and `delimiter[x]` may result in unexcepted behavior as string is utf-8 encoded and `x < 0` could be true. 5. changes the behavior of `tscGetToken` a little: now, unescaped single quotation is allowed in double quoted strings and unescaped double quotation is allowed in single quoted strings. 6. minor performance improvements and other improvements. --- src/client/src/tscParseInsert.c | 11 ++--- src/inc/tstoken.h | 2 +- src/kit/shell/src/shellEngine.c | 13 ++++++ src/util/src/tstoken.c | 77 ++++++++++++++------------------- 4 files changed, 49 insertions(+), 54 deletions(-) diff --git a/src/client/src/tscParseInsert.c b/src/client/src/tscParseInsert.c index 7dec7280f8..e70a2c00f3 100644 --- a/src/client/src/tscParseInsert.c +++ b/src/client/src/tscParseInsert.c @@ -94,16 +94,12 @@ int tsParseTime(char *value, int32_t valuelen, int64_t *time, char **next, char int64_t useconds = 0; char *pTokenEnd = *next; - tscGetToken(pTokenEnd, &token, &tokenlen); - if (tokenlen == 0 && strlen(value) == 0) { - INVALID_SQL_RET_MSG(error, "missing time stamp"); - } - if (strncmp(value, "now", 3) == 0 && valuelen == 3) { + if (valuelen == 3 && (strncmp(value, "now", 3) == 0)) { useconds = taosGetTimestamp(timePrec); - } else if (strncmp(value, "0", 1) == 0 && valuelen == 1) { + } else if (valuelen == 1 && value[0] == '0') { // do nothing - } else if (value[4] != '-') { + } else if (valuelen <= 4 || value[4] != '-') { for (int32_t i = 0; i < valuelen; ++i) { /* * filter illegal input. @@ -126,7 +122,6 @@ int tsParseTime(char *value, int32_t valuelen, int64_t *time, char **next, char for (int k = valuelen; value[k] != '\0'; k++) { if (value[k] == ' ' || value[k] == '\t') continue; if (value[k] == ',') { - *next = pTokenEnd; *time = useconds; return 0; } diff --git a/src/inc/tstoken.h b/src/inc/tstoken.h index 5305d2c8fa..2e508ab06d 100644 --- a/src/inc/tstoken.h +++ b/src/inc/tstoken.h @@ -30,7 +30,7 @@ typedef struct SSQLToken { } SSQLToken; char *tscGetToken(char *string, char **token, int *tokenLen); -char *tscGetTokenDelimiter(char *string, char **token, int *tokenLen, char *delimiters); +char *tscGetTokenDelimiter(char *string, char **token, int *tokenLen, const char *delimiters); /** * tokenizer for sql string diff --git a/src/kit/shell/src/shellEngine.c b/src/kit/shell/src/shellEngine.c index ad7ee0af6e..0b276f2f87 100644 --- a/src/kit/shell/src/shellEngine.c +++ b/src/kit/shell/src/shellEngine.c @@ -111,6 +111,7 @@ TAOS *shellInit(struct arguments *args) { void shellReplaceCtrlChar(char *str) { _Bool ctrlOn = false; char *pstr = NULL; + char quote = 0; for (pstr = str; *str != '\0'; ++str) { if (ctrlOn) { @@ -131,6 +132,13 @@ void shellReplaceCtrlChar(char *str) { *pstr = '\\'; pstr++; break; + case '\'': + case '"': + if (quote) { + *pstr++ = '\\'; + *pstr++ = *str; + } + break; default: break; } @@ -139,6 +147,11 @@ void shellReplaceCtrlChar(char *str) { if (*str == '\\') { ctrlOn = true; } else { + if (quote == *str) { + quote = 0; + } else if (*str == '\'' || *str == '"') { + quote = *str; + } *pstr = *str; pstr++; } diff --git a/src/util/src/tstoken.c b/src/util/src/tstoken.c index a6265e18cc..a29a06a9be 100644 --- a/src/util/src/tstoken.c +++ b/src/util/src/tstoken.c @@ -23,77 +23,68 @@ #include "shash.h" #include "tstoken.h" -static char operator[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +static const char operator[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '$', '%', '&', 0, '(', ')', '*', '+', 0, '-', 0, '/', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '<', '=', '>', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '[', 0, ']', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '|', 0, 0, 0}; -static char delimiter[] = { +static const char delimiter[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ',', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ';', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; -bool isCharInDelimiter(char c, char *delimiter) { - for (int i = 0; i < strlen(delimiter); i++) { - if (delimiter[i] == c) return true; - } - return false; -} - -char *tscGetTokenDelimiter(char *string, char **token, int *tokenLen, char *delimiters) { - while (*string != 0) { - if (isCharInDelimiter(*string, delimiters)) { - ++string; - } else { - break; - } +char *tscGetTokenDelimiter(char *string, char **token, int *tokenLen, const char *delimiters) { + while ((*string != 0) && strchr(delimiters, *string)) { + ++string; } *token = string; char *str = string; - *tokenLen = 0; - while (*str != 0) { - if (!isCharInDelimiter(*str, delimiters)) { - *tokenLen = *tokenLen + 1; - str++; - } else { - break; - } + while ((*str != 0) && (strchr(delimiters, *str) == NULL)) { + ++str; } + *tokenLen = str - string; + return string; } +static bool isOperator(char c) { + return (c < 0) ? false : (operator[c] != 0); +} + +static bool isDelimiter(char c) { + return (c < 0) ? false : (delimiter[c] != 0); +} + char *tscGetToken(char *string, char **token, int *tokenLen) { char quote = 0; while (*string != 0) { - if (delimiter[*string]) { + if (isDelimiter(*string)) { ++string; } else { break; } } - char quotaChar = 0; if (*string == '\'' || *string == '\"') { - quote = 1; - quotaChar = *string; + quote = *string; string++; } *token = string; /* not in string, return token */ - if (*string > 0 && operator[*string] && quote == 0) { + if (quote == 0 && isOperator(*string)) { string++; /* handle the case: insert into tabx using stable1 tags(-1)/tags(+1) * values(....) */ - if (operator[*string] &&(*string != '(' && *string != ')' && *string != '-' && *string != '+')) + if (isOperator(*string) &&(*string != '(' && *string != ')' && *string != '-' && *string != '+')) *tokenLen = 2; else *tokenLen = 1; @@ -102,28 +93,24 @@ char *tscGetToken(char *string, char **token, int *tokenLen) { while (*string != 0) { if (quote) { - // handle escape situation: '\"', the " should not be eliminated - if (*string == quotaChar) { - if (*(string - 1) != '\\') { - break; - } else { + if (*string == '\'' || *string == '"') { + // handle escape situation, " and ' should not be eliminated + if (*(string - 1) == '\\') { shiftStr(string - 1, string); + continue; + } else if (*string == quote) { + break; } - } else { - ++string; } - } else { - if (delimiter[*string]) break; - - if (operator[*string]) break; - - ++string; + } else if (isDelimiter(*string) || isOperator(*string)) { + break; } + ++string; } *tokenLen = (int)(string - *token); - if (quotaChar != 0 && *string != 0 && *(string + 1) != 0) { + if (quote && *string != 0) { return string + 1; } else { return string; @@ -135,7 +122,7 @@ void shiftStr(char *dst, char *src) { do { dst[i] = src[i]; i++; - } while (delimiter[src[i]] == 0); + } while (!isDelimiter(src[i])); src[i - 1] = ' '; } -- GitLab