提交 693ee662 编写于 作者: weixin_48148422's avatar weixin_48148422

fix several issues in string token parsing

1. the shell should not remove escape sequence \' and \" in a string.
2. `tsParseTime` should not unescape the next string token (this issue
appears after the first issue was fixed).
3. `value[4] != '-'` in `tsParseTime` crashes in rare case if `value[4]`
is in unallocated virtual memory.
4. `operator[x]` and `delimiter[x]` may result in unexcepted behavior
as string is utf-8 encoded and `x < 0` could be true.
5. changes the behavior of `tscGetToken` a little: now, unescaped single
quotation is allowed in double quoted strings and unescaped double quotation
is allowed in single quoted strings.
6. minor performance improvements and other improvements.
上级 90e5690d
...@@ -94,16 +94,12 @@ int tsParseTime(char *value, int32_t valuelen, int64_t *time, char **next, char ...@@ -94,16 +94,12 @@ int tsParseTime(char *value, int32_t valuelen, int64_t *time, char **next, char
int64_t useconds = 0; int64_t useconds = 0;
char *pTokenEnd = *next; char *pTokenEnd = *next;
tscGetToken(pTokenEnd, &token, &tokenlen);
if (tokenlen == 0 && strlen(value) == 0) {
INVALID_SQL_RET_MSG(error, "missing time stamp");
}
if (strncmp(value, "now", 3) == 0 && valuelen == 3) { if (valuelen == 3 && (strncmp(value, "now", 3) == 0)) {
useconds = taosGetTimestamp(timePrec); useconds = taosGetTimestamp(timePrec);
} else if (strncmp(value, "0", 1) == 0 && valuelen == 1) { } else if (valuelen == 1 && value[0] == '0') {
// do nothing // do nothing
} else if (value[4] != '-') { } else if (valuelen <= 4 || value[4] != '-') {
for (int32_t i = 0; i < valuelen; ++i) { for (int32_t i = 0; i < valuelen; ++i) {
/* /*
* filter illegal input. * filter illegal input.
...@@ -126,7 +122,6 @@ int tsParseTime(char *value, int32_t valuelen, int64_t *time, char **next, char ...@@ -126,7 +122,6 @@ int tsParseTime(char *value, int32_t valuelen, int64_t *time, char **next, char
for (int k = valuelen; value[k] != '\0'; k++) { for (int k = valuelen; value[k] != '\0'; k++) {
if (value[k] == ' ' || value[k] == '\t') continue; if (value[k] == ' ' || value[k] == '\t') continue;
if (value[k] == ',') { if (value[k] == ',') {
*next = pTokenEnd;
*time = useconds; *time = useconds;
return 0; return 0;
} }
......
...@@ -30,7 +30,7 @@ typedef struct SSQLToken { ...@@ -30,7 +30,7 @@ typedef struct SSQLToken {
} SSQLToken; } SSQLToken;
char *tscGetToken(char *string, char **token, int *tokenLen); char *tscGetToken(char *string, char **token, int *tokenLen);
char *tscGetTokenDelimiter(char *string, char **token, int *tokenLen, char *delimiters); char *tscGetTokenDelimiter(char *string, char **token, int *tokenLen, const char *delimiters);
/** /**
* tokenizer for sql string * tokenizer for sql string
......
...@@ -111,6 +111,7 @@ TAOS *shellInit(struct arguments *args) { ...@@ -111,6 +111,7 @@ TAOS *shellInit(struct arguments *args) {
void shellReplaceCtrlChar(char *str) { void shellReplaceCtrlChar(char *str) {
_Bool ctrlOn = false; _Bool ctrlOn = false;
char *pstr = NULL; char *pstr = NULL;
char quote = 0;
for (pstr = str; *str != '\0'; ++str) { for (pstr = str; *str != '\0'; ++str) {
if (ctrlOn) { if (ctrlOn) {
...@@ -131,6 +132,13 @@ void shellReplaceCtrlChar(char *str) { ...@@ -131,6 +132,13 @@ void shellReplaceCtrlChar(char *str) {
*pstr = '\\'; *pstr = '\\';
pstr++; pstr++;
break; break;
case '\'':
case '"':
if (quote) {
*pstr++ = '\\';
*pstr++ = *str;
}
break;
default: default:
break; break;
} }
...@@ -139,6 +147,11 @@ void shellReplaceCtrlChar(char *str) { ...@@ -139,6 +147,11 @@ void shellReplaceCtrlChar(char *str) {
if (*str == '\\') { if (*str == '\\') {
ctrlOn = true; ctrlOn = true;
} else { } else {
if (quote == *str) {
quote = 0;
} else if (*str == '\'' || *str == '"') {
quote = *str;
}
*pstr = *str; *pstr = *str;
pstr++; pstr++;
} }
......
...@@ -23,77 +23,68 @@ ...@@ -23,77 +23,68 @@
#include "shash.h" #include "shash.h"
#include "tstoken.h" #include "tstoken.h"
static char operator[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, static const char operator[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '$', '%', '&', 0, '(', ')', '*', '+', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '$', '%', '&', 0, '(', ')', '*', '+',
0, '-', 0, '/', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '<', '=', '>', 0, 0, 0, 0, '-', 0, '/', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '<', '=', '>', 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, '[', 0, ']', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '[', 0, ']', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '|', 0, 0, 0}; 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '|', 0, 0, 0};
static char delimiter[] = { static const char delimiter[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ',', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ';', 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ',', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ';', 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
}; };
bool isCharInDelimiter(char c, char *delimiter) { char *tscGetTokenDelimiter(char *string, char **token, int *tokenLen, const char *delimiters) {
for (int i = 0; i < strlen(delimiter); i++) { while ((*string != 0) && strchr(delimiters, *string)) {
if (delimiter[i] == c) return true; ++string;
}
return false;
}
char *tscGetTokenDelimiter(char *string, char **token, int *tokenLen, char *delimiters) {
while (*string != 0) {
if (isCharInDelimiter(*string, delimiters)) {
++string;
} else {
break;
}
} }
*token = string; *token = string;
char *str = string; char *str = string;
*tokenLen = 0; while ((*str != 0) && (strchr(delimiters, *str) == NULL)) {
while (*str != 0) { ++str;
if (!isCharInDelimiter(*str, delimiters)) {
*tokenLen = *tokenLen + 1;
str++;
} else {
break;
}
} }
*tokenLen = str - string;
return string; return string;
} }
static bool isOperator(char c) {
return (c < 0) ? false : (operator[c] != 0);
}
static bool isDelimiter(char c) {
return (c < 0) ? false : (delimiter[c] != 0);
}
char *tscGetToken(char *string, char **token, int *tokenLen) { char *tscGetToken(char *string, char **token, int *tokenLen) {
char quote = 0; char quote = 0;
while (*string != 0) { while (*string != 0) {
if (delimiter[*string]) { if (isDelimiter(*string)) {
++string; ++string;
} else { } else {
break; break;
} }
} }
char quotaChar = 0;
if (*string == '\'' || *string == '\"') { if (*string == '\'' || *string == '\"') {
quote = 1; quote = *string;
quotaChar = *string;
string++; string++;
} }
*token = string; *token = string;
/* not in string, return token */ /* not in string, return token */
if (*string > 0 && operator[*string] && quote == 0) { if (quote == 0 && isOperator(*string)) {
string++; string++;
/* handle the case: insert into tabx using stable1 tags(-1)/tags(+1) /* handle the case: insert into tabx using stable1 tags(-1)/tags(+1)
* values(....) */ * values(....) */
if (operator[*string] &&(*string != '(' && *string != ')' && *string != '-' && *string != '+')) if (isOperator(*string) &&(*string != '(' && *string != ')' && *string != '-' && *string != '+'))
*tokenLen = 2; *tokenLen = 2;
else else
*tokenLen = 1; *tokenLen = 1;
...@@ -102,28 +93,24 @@ char *tscGetToken(char *string, char **token, int *tokenLen) { ...@@ -102,28 +93,24 @@ char *tscGetToken(char *string, char **token, int *tokenLen) {
while (*string != 0) { while (*string != 0) {
if (quote) { if (quote) {
// handle escape situation: '\"', the " should not be eliminated if (*string == '\'' || *string == '"') {
if (*string == quotaChar) { // handle escape situation, " and ' should not be eliminated
if (*(string - 1) != '\\') { if (*(string - 1) == '\\') {
break;
} else {
shiftStr(string - 1, string); shiftStr(string - 1, string);
continue;
} else if (*string == quote) {
break;
} }
} else {
++string;
} }
} else { } else if (isDelimiter(*string) || isOperator(*string)) {
if (delimiter[*string]) break; break;
if (operator[*string]) break;
++string;
} }
++string;
} }
*tokenLen = (int)(string - *token); *tokenLen = (int)(string - *token);
if (quotaChar != 0 && *string != 0 && *(string + 1) != 0) { if (quote && *string != 0) {
return string + 1; return string + 1;
} else { } else {
return string; return string;
...@@ -135,7 +122,7 @@ void shiftStr(char *dst, char *src) { ...@@ -135,7 +122,7 @@ void shiftStr(char *dst, char *src) {
do { do {
dst[i] = src[i]; dst[i] = src[i];
i++; i++;
} while (delimiter[src[i]] == 0); } while (!isDelimiter(src[i]));
src[i - 1] = ' '; src[i - 1] = ' ';
} }
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册