parTokenizer.c 21.6 KB
Newer Older
H
hzcheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

H
hjxilinx 已提交
16
#include "os.h"
X
Xiaoyu Wang 已提交
17
#include "parToken.h"
H
hjxilinx 已提交
18
#include "taosdef.h"
X
Xiaoyu Wang 已提交
19
#include "thash.h"
20
#include "ttokendef.h"
H
hzcheng 已提交
21

S
slguan 已提交
22
// All the keywords of the SQL language are stored in a hash table
H
hzcheng 已提交
23
typedef struct SKeyword {
S
slguan 已提交
24
  const char* name;  // The keyword name
H
Haojun Liao 已提交
25
  uint16_t    type;  // type
S
slguan 已提交
26
  uint8_t     len;   // length
H
hzcheng 已提交
27 28
} SKeyword;

X
Xiaoyu Wang 已提交
29
// clang-format off
S
slguan 已提交
30
// keywords in sql string
H
hzcheng 已提交
31
static SKeyword keywordTable[] = {
X
Xiaoyu Wang 已提交
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
    {"ACCOUNT",              TK_ACCOUNT},
    {"ACCOUNTS",             TK_ACCOUNTS},
    {"ADD",                  TK_ADD},
    {"AGGREGATE",            TK_AGGREGATE},
    {"ALL",                  TK_ALL},
    {"ALTER",                TK_ALTER},
    {"ANALYZE",              TK_ANALYZE},
    {"AND",                  TK_AND},
    {"APPS",                 TK_APPS},
    {"AS",                   TK_AS},
    {"ASC",                  TK_ASC},
    {"AT_ONCE",              TK_AT_ONCE},
    {"BALANCE",              TK_BALANCE},
    {"BETWEEN",              TK_BETWEEN},
    {"BIGINT",               TK_BIGINT},
X
Xiaoyu Wang 已提交
47
    {"BINARY",               TK_BINARY},
X
Xiaoyu Wang 已提交
48 49 50 51 52 53 54 55 56
    {"BNODE",                TK_BNODE},
    {"BNODES",               TK_BNODES},
    {"BOOL",                 TK_BOOL},
    {"BUFFER",               TK_BUFFER},
    {"BUFSIZE",              TK_BUFSIZE},
    {"BY",                   TK_BY},
    {"CACHE",                TK_CACHE},
    {"CACHEMODEL",           TK_CACHEMODEL},
    {"CACHESIZE",            TK_CACHESIZE},
X
Xiaoyu Wang 已提交
57
    {"CASE",                 TK_CASE},
X
Xiaoyu Wang 已提交
58 59 60 61 62 63
    {"CAST",                 TK_CAST},
    {"CLIENT_VERSION",       TK_CLIENT_VERSION},
    {"CLUSTER",              TK_CLUSTER},
    {"COLUMN",               TK_COLUMN},
    {"COMMENT",              TK_COMMENT},
    {"COMP",                 TK_COMP},
X
Xiaoyu Wang 已提交
64
    {"COMPACT",              TK_COMPACT},
X
Xiaoyu Wang 已提交
65 66
    {"CONNECTION",           TK_CONNECTION},
    {"CONNECTIONS",          TK_CONNECTIONS},
X
Xiaoyu Wang 已提交
67
    {"CONNS",                TK_CONNS},
X
Xiaoyu Wang 已提交
68 69 70 71 72 73 74 75 76 77
    {"CONSUMER",             TK_CONSUMER},
    {"CONSUMERS",            TK_CONSUMERS},
    {"CONTAINS",             TK_CONTAINS},
    {"COUNT",                TK_COUNT},
    {"CREATE",               TK_CREATE},
    {"CURRENT_USER",         TK_CURRENT_USER},
    {"DATABASE",             TK_DATABASE},
    {"DATABASES",            TK_DATABASES},
    {"DBS",                  TK_DBS},
    {"DELETE",               TK_DELETE},
78
    {"DELETE_MARK",          TK_DELETE_MARK},
X
Xiaoyu Wang 已提交
79 80 81 82 83 84 85 86 87
    {"DESC",                 TK_DESC},
    {"DESCRIBE",             TK_DESCRIBE},
    {"DISTINCT",             TK_DISTINCT},
    {"DISTRIBUTED",          TK_DISTRIBUTED},
    {"DNODE",                TK_DNODE},
    {"DNODES",               TK_DNODES},
    {"DOUBLE",               TK_DOUBLE},
    {"DROP",                 TK_DROP},
    {"DURATION",             TK_DURATION},
X
Xiaoyu Wang 已提交
88
    {"ELSE",                 TK_ELSE},
X
Xiaoyu Wang 已提交
89
    {"ENABLE",               TK_ENABLE},
X
Xiaoyu Wang 已提交
90
    {"END",                  TK_END},
X
Xiaoyu Wang 已提交
91 92 93
    {"EXISTS",               TK_EXISTS},
    {"EXPIRED",              TK_EXPIRED},
    {"EXPLAIN",              TK_EXPLAIN},
X
Xiaoyu Wang 已提交
94
    {"EVENT_WINDOW",         TK_EVENT_WINDOW},
X
Xiaoyu Wang 已提交
95 96 97
    {"EVERY",                TK_EVERY},
    {"FILE",                 TK_FILE},
    {"FILL",                 TK_FILL},
98
    {"FILL_HISTORY",         TK_FILL_HISTORY},
X
Xiaoyu Wang 已提交
99 100 101 102
    {"FIRST",                TK_FIRST},
    {"FLOAT",                TK_FLOAT},
    {"FLUSH",                TK_FLUSH},
    {"FROM",                 TK_FROM},
103
    {"FORCE",                TK_FORCE},
X
Xiaoyu Wang 已提交
104 105
    {"FUNCTION",             TK_FUNCTION},
    {"FUNCTIONS",            TK_FUNCTIONS},
D
Dingle Zhang 已提交
106
    {"GEOMETRY",             TK_GEOMETRY},
X
Xiaoyu Wang 已提交
107 108 109 110 111 112 113 114 115 116 117 118
    {"GRANT",                TK_GRANT},
    {"GRANTS",               TK_GRANTS},
    {"GROUP",                TK_GROUP},
    {"HAVING",               TK_HAVING},
    {"IF",                   TK_IF},
    {"IGNORE",               TK_IGNORE},
    {"IMPORT",               TK_IMPORT},
    {"IN",                   TK_IN},
    {"INDEX",                TK_INDEX},
    {"INDEXES",              TK_INDEXES},
    {"INNER",                TK_INNER},
    {"INSERT",               TK_INSERT},
X
Xiaoyu Wang 已提交
119
    {"INT",                  TK_INT},
X
Xiaoyu Wang 已提交
120 121 122 123 124 125 126 127
    {"INTEGER",              TK_INTEGER},
    {"INTERVAL",             TK_INTERVAL},
    {"INTO",                 TK_INTO},
    {"IS",                   TK_IS},
    {"JOIN",                 TK_JOIN},
    {"JSON",                 TK_JSON},
    {"KEEP",                 TK_KEEP},
    {"KILL",                 TK_KILL},
128
    {"LANGUAGE",             TK_LANGUAGE},
X
Xiaoyu Wang 已提交
129 130
    {"LAST",                 TK_LAST},
    {"LAST_ROW",             TK_LAST_ROW},
131
    {"LEADER",               TK_LEADER},
X
Xiaoyu Wang 已提交
132
    {"LICENCES",             TK_LICENCES},
X
Xiaoyu Wang 已提交
133 134 135 136 137 138 139
    {"LIKE",                 TK_LIKE},
    {"LIMIT",                TK_LIMIT},
    {"LINEAR",               TK_LINEAR},
    {"LOCAL",                TK_LOCAL},
    {"MATCH",                TK_MATCH},
    {"MAXROWS",              TK_MAXROWS},
    {"MAX_DELAY",            TK_MAX_DELAY},
140
    {"MAX_SPEED",            TK_MAX_SPEED},
X
Xiaoyu Wang 已提交
141 142
    {"MERGE",                TK_MERGE},
    {"META",                 TK_META},
wmmhello's avatar
wmmhello 已提交
143
    {"ONLY",                 TK_ONLY},
X
Xiaoyu Wang 已提交
144 145 146 147 148 149 150 151 152 153 154 155 156
    {"MINROWS",              TK_MINROWS},
    {"MINUS",                TK_MINUS},
    {"MNODE",                TK_MNODE},
    {"MNODES",               TK_MNODES},
    {"MODIFY",               TK_MODIFY},
    {"MODULES",              TK_MODULES},
    {"NCHAR",                TK_NCHAR},
    {"NEXT",                 TK_NEXT},
    {"NMATCH",               TK_NMATCH},
    {"NONE",                 TK_NONE},
    {"NOT",                  TK_NOT},
    {"NOW",                  TK_NOW},
    {"NULL",                 TK_NULL},
D
dapan1121 已提交
157
    {"NULL_F",               TK_NULL_F},
X
Xiaoyu Wang 已提交
158 159 160 161 162 163 164 165
    {"NULLS",                TK_NULLS},
    {"OFFSET",               TK_OFFSET},
    {"ON",                   TK_ON},
    {"OR",                   TK_OR},
    {"ORDER",                TK_ORDER},
    {"OUTPUTTYPE",           TK_OUTPUTTYPE},
    {"PAGES",                TK_PAGES},
    {"PAGESIZE",             TK_PAGESIZE},
X
Xiaoyu Wang 已提交
166 167
    {"PARTITION",            TK_PARTITION},
    {"PASS",                 TK_PASS},
X
Xiaoyu Wang 已提交
168 169 170 171
    {"PORT",                 TK_PORT},
    {"PPS",                  TK_PPS},
    {"PRECISION",            TK_PRECISION},
    {"PREV",                 TK_PREV},
172
    {"PRIVILEGES",           TK_PRIVILEGES},
X
Xiaoyu Wang 已提交
173 174 175 176 177 178 179
    {"QNODE",                TK_QNODE},
    {"QNODES",               TK_QNODES},
    {"QTIME",                TK_QTIME},
    {"QUERIES",              TK_QUERIES},
    {"QUERY",                TK_QUERY},
    {"RANGE",                TK_RANGE},
    {"RATIO",                TK_RATIO},
180
    {"PAUSE",                TK_PAUSE},
X
Xiaoyu Wang 已提交
181 182 183
    {"READ",                 TK_READ},
    {"REDISTRIBUTE",         TK_REDISTRIBUTE},
    {"RENAME",               TK_RENAME},
184
    {"REPLACE",              TK_REPLACE},
X
Xiaoyu Wang 已提交
185 186
    {"REPLICA",              TK_REPLICA},
    {"RESET",                TK_RESET},
187
    {"RESUME",               TK_RESUME},
D
dapan1121 已提交
188
    {"RESTORE",              TK_RESTORE},
X
Xiaoyu Wang 已提交
189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210
    {"RETENTIONS",           TK_RETENTIONS},
    {"REVOKE",               TK_REVOKE},
    {"ROLLUP",               TK_ROLLUP},
    {"SCHEMALESS",           TK_SCHEMALESS},
    {"SCORES",               TK_SCORES},
    {"SELECT",               TK_SELECT},
    {"SERVER_STATUS",        TK_SERVER_STATUS},
    {"SERVER_VERSION",       TK_SERVER_VERSION},
    {"SESSION",              TK_SESSION},
    {"SET",                  TK_SET},
    {"SHOW",                 TK_SHOW},
    {"SINGLE_STABLE",        TK_SINGLE_STABLE},
    {"SLIDING",              TK_SLIDING},
    {"SLIMIT",               TK_SLIMIT},
    {"SMA",                  TK_SMA},
    {"SMALLINT",             TK_SMALLINT},
    {"SNODE",                TK_SNODE},
    {"SNODES",               TK_SNODES},
    {"SOFFSET",              TK_SOFFSET},
    {"SPLIT",                TK_SPLIT},
    {"STABLE",               TK_STABLE},
    {"STABLES",              TK_STABLES},
X
Xiaoyu Wang 已提交
211
    {"START",                TK_START},
X
Xiaoyu Wang 已提交
212 213 214 215 216 217
    {"STATE",                TK_STATE},
    {"STATE_WINDOW",         TK_STATE_WINDOW},
    {"STORAGE",              TK_STORAGE},
    {"STREAM",               TK_STREAM},
    {"STREAMS",              TK_STREAMS},
    {"STRICT",               TK_STRICT},
X
Xiaoyu Wang 已提交
218
    {"STT_TRIGGER",          TK_STT_TRIGGER},
219
    {"SUBSCRIBE",            TK_SUBSCRIBE},
X
Xiaoyu Wang 已提交
220
    {"SUBSCRIPTIONS",        TK_SUBSCRIPTIONS},
221
    {"SUBTABLE",             TK_SUBTABLE},
X
Xiaoyu Wang 已提交
222 223 224
    {"SYSINFO",              TK_SYSINFO},
    {"TABLE",                TK_TABLE},
    {"TABLES",               TK_TABLES},
225 226
    {"TABLE_PREFIX",         TK_TABLE_PREFIX},
    {"TABLE_SUFFIX",         TK_TABLE_SUFFIX},
X
Xiaoyu Wang 已提交
227 228 229
    {"TAG",                  TK_TAG},
    {"TAGS",                 TK_TAGS},
    {"TBNAME",               TK_TBNAME},
X
Xiaoyu Wang 已提交
230
    {"THEN",                 TK_THEN},
X
Xiaoyu Wang 已提交
231 232 233 234 235 236 237 238 239 240 241
    {"TIMESTAMP",            TK_TIMESTAMP},
    {"TIMEZONE",             TK_TIMEZONE},
    {"TINYINT",              TK_TINYINT},
    {"TO",                   TK_TO},
    {"TODAY",                TK_TODAY},
    {"TOPIC",                TK_TOPIC},
    {"TOPICS",               TK_TOPICS},
    {"TRANSACTION",          TK_TRANSACTION},
    {"TRANSACTIONS",         TK_TRANSACTIONS},
    {"TRIGGER",              TK_TRIGGER},
    {"TRIM",                 TK_TRIM},
242
    {"TSDB_PAGESIZE",        TK_TSDB_PAGESIZE},
X
Xiaoyu Wang 已提交
243 244 245
    {"TSERIES",              TK_TSERIES},
    {"TTL",                  TK_TTL},
    {"UNION",                TK_UNION},
D
dapan1121 已提交
246
    {"UNSAFE",               TK_UNSAFE},
X
Xiaoyu Wang 已提交
247
    {"UNSIGNED",             TK_UNSIGNED},
248
    {"UNTREATED",            TK_UNTREATED},
249
    {"UPDATE",               TK_UPDATE},
X
Xiaoyu Wang 已提交
250 251 252 253 254
    {"USE",                  TK_USE},
    {"USER",                 TK_USER},
    {"USERS",                TK_USERS},
    {"USING",                TK_USING},
    {"VALUE",                TK_VALUE},
D
dapan1121 已提交
255
    {"VALUE_F",              TK_VALUE_F},
X
Xiaoyu Wang 已提交
256 257 258 259 260 261
    {"VALUES",               TK_VALUES},
    {"VARCHAR",              TK_VARCHAR},
    {"VARIABLES",            TK_VARIABLES},
    {"VERBOSE",              TK_VERBOSE},
    {"VGROUP",               TK_VGROUP},
    {"VGROUPS",              TK_VGROUPS},
D
dapan1121 已提交
262
    {"VNODE",                TK_VNODE},
263
    {"VNODES",               TK_VNODES},
X
Xiaoyu Wang 已提交
264 265
    {"WAL_FSYNC_PERIOD",     TK_WAL_FSYNC_PERIOD},
    {"WAL_LEVEL",            TK_WAL_LEVEL},
X
Xiaoyu Wang 已提交
266 267 268 269
    {"WAL_RETENTION_PERIOD", TK_WAL_RETENTION_PERIOD},
    {"WAL_RETENTION_SIZE",   TK_WAL_RETENTION_SIZE},
    {"WAL_ROLL_PERIOD",      TK_WAL_ROLL_PERIOD},
    {"WAL_SEGMENT_SIZE",     TK_WAL_SEGMENT_SIZE},
X
Xiaoyu Wang 已提交
270
    {"WATERMARK",            TK_WATERMARK},
X
Xiaoyu Wang 已提交
271
    {"WHEN",                 TK_WHEN},
X
Xiaoyu Wang 已提交
272 273 274 275 276
    {"WHERE",                TK_WHERE},
    {"WINDOW_CLOSE",         TK_WINDOW_CLOSE},
    {"WITH",                 TK_WITH},
    {"WRITE",                TK_WRITE},
    {"_C0",                  TK_ROWTS},
277
    {"_IROWTS",              TK_IROWTS},
278
    {"_ISFILLED",            TK_ISFILLED},
X
Xiaoyu Wang 已提交
279 280 281 282
    {"_QDURATION",           TK_QDURATION},
    {"_QEND",                TK_QEND},
    {"_QSTART",              TK_QSTART},
    {"_ROWTS",               TK_ROWTS},
283
    {"_TAGS",                TK_QTAGS},
X
Xiaoyu Wang 已提交
284 285 286
    {"_WDURATION",           TK_WDURATION},
    {"_WEND",                TK_WEND},
    {"_WSTART",              TK_WSTART},
287
    {"ALIVE",                TK_ALIVE},
H
hzcheng 已提交
288
};
X
Xiaoyu Wang 已提交
289
// clang-format on
H
hzcheng 已提交
290 291 292 293 294 295 296 297 298 299 300 301 302

static const char isIdChar[] = {
    /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
};

H
Haojun Liao 已提交
303
static void* keywordHashTable = NULL;
H
hzcheng 已提交
304

S
TD-1057  
Shengliang Guan 已提交
305
static void doInitKeywordsTable(void) {
306
  int numOfEntries = tListLen(keywordTable);
X
Xiaoyu Wang 已提交
307

H
Haojun Liao 已提交
308
  keywordHashTable = taosHashInit(numOfEntries, MurmurHash3_32, true, false);
309
  for (int32_t i = 0; i < numOfEntries; i++) {
S
TD-1057  
Shengliang Guan 已提交
310
    keywordTable[i].len = (uint8_t)strlen(keywordTable[i].name);
311
    void* ptr = &keywordTable[i];
H
Haojun Liao 已提交
312
    taosHashPut(keywordHashTable, keywordTable[i].name, keywordTable[i].len, (void*)&ptr, POINTER_BYTES);
H
hzcheng 已提交
313
  }
314 315
}

wafwerar's avatar
wafwerar 已提交
316
static TdThreadOnce keywordsHashTableInit = PTHREAD_ONCE_INIT;
317

318
static int32_t tKeywordCode(const char* z, int n) {
wafwerar's avatar
wafwerar 已提交
319
  taosThreadOnce(&keywordsHashTableInit, doInitKeywordsTable);
X
Xiaoyu Wang 已提交
320

H
hjxilinx 已提交
321
  char key[512] = {0};
X
Xiaoyu Wang 已提交
322
  if (n > tListLen(key)) {  // too long token, can not be any other token type
323
    return TK_NK_ID;
324
  }
X
Xiaoyu Wang 已提交
325

H
hzcheng 已提交
326 327
  for (int32_t j = 0; j < n; ++j) {
    if (z[j] >= 'a' && z[j] <= 'z') {
328
      key[j] = (char)(z[j] & 0xDF);  // to uppercase and set the null-terminated
H
hzcheng 已提交
329 330 331 332 333
    } else {
      key[j] = z[j];
    }
  }

D
dapan1121 已提交
334
  if (keywordHashTable == NULL) {
335
    return TK_NK_ILLEGAL;
D
dapan1121 已提交
336
  }
H
Haojun Liao 已提交
337

H
Haojun Liao 已提交
338
  SKeyword** pKey = (SKeyword**)taosHashGet(keywordHashTable, key, n);
X
Xiaoyu Wang 已提交
339
  return (pKey != NULL) ? (*pKey)->type : TK_NK_ID;
H
hzcheng 已提交
340 341
}

H
huili 已提交
342
/*
343 344 345
 * Return the length of the token that begins at z[0].
 * Store the token type in *type before returning.
 */
346
uint32_t tGetToken(const char* z, uint32_t* tokenId) {
347
  uint32_t i;
H
hzcheng 已提交
348 349 350 351 352 353 354 355
  switch (*z) {
    case ' ':
    case '\t':
    case '\n':
    case '\f':
    case '\r': {
      for (i = 1; isspace(z[i]); i++) {
      }
356
      *tokenId = TK_NK_SPACE;
H
hzcheng 已提交
357 358 359
      return i;
    }
    case ':': {
360
      *tokenId = TK_NK_COLON;
H
hzcheng 已提交
361 362 363 364 365 366
      return 1;
    }
    case '-': {
      if (z[1] == '-') {
        for (i = 2; z[i] && z[i] != '\n'; i++) {
        }
367
        *tokenId = TK_NK_COMMENT;
H
hzcheng 已提交
368
        return i;
369 370 371
      } else if (z[1] == '>') {
        *tokenId = TK_NK_ARROW;
        return 2;
H
hzcheng 已提交
372
      }
X
Xiaoyu Wang 已提交
373
      *tokenId = TK_NK_MINUS;
H
hzcheng 已提交
374 375 376
      return 1;
    }
    case '(': {
377
      *tokenId = TK_NK_LP;
H
hzcheng 已提交
378 379 380
      return 1;
    }
    case ')': {
381
      *tokenId = TK_NK_RP;
H
hzcheng 已提交
382 383 384
      return 1;
    }
    case ';': {
385
      *tokenId = TK_NK_SEMI;
H
hzcheng 已提交
386 387 388
      return 1;
    }
    case '+': {
389
      *tokenId = TK_NK_PLUS;
H
hzcheng 已提交
390 391 392
      return 1;
    }
    case '*': {
393
      *tokenId = TK_NK_STAR;
H
hzcheng 已提交
394 395 396 397
      return 1;
    }
    case '/': {
      if (z[1] != '*' || z[2] == 0) {
398
        *tokenId = TK_NK_SLASH;
H
hzcheng 已提交
399 400 401 402 403
        return 1;
      }
      for (i = 3; z[i] && (z[i] != '/' || z[i - 1] != '*'); i++) {
      }
      if (z[i]) i++;
404
      *tokenId = TK_NK_COMMENT;
H
hzcheng 已提交
405 406 407
      return i;
    }
    case '%': {
408
      *tokenId = TK_NK_REM;
H
hzcheng 已提交
409 410 411
      return 1;
    }
    case '=': {
412
      *tokenId = TK_NK_EQ;
H
hzcheng 已提交
413 414 415 416
      return 1 + (z[1] == '=');
    }
    case '<': {
      if (z[1] == '=') {
417
        *tokenId = TK_NK_LE;
H
hzcheng 已提交
418 419
        return 2;
      } else if (z[1] == '>') {
420
        *tokenId = TK_NK_NE;
H
hzcheng 已提交
421 422
        return 2;
      } else if (z[1] == '<') {
423
        *tokenId = TK_NK_LSHIFT;
H
hzcheng 已提交
424 425
        return 2;
      } else {
426
        *tokenId = TK_NK_LT;
H
hzcheng 已提交
427 428 429 430 431
        return 1;
      }
    }
    case '>': {
      if (z[1] == '=') {
432
        *tokenId = TK_NK_GE;
H
hzcheng 已提交
433 434
        return 2;
      } else if (z[1] == '>') {
435
        *tokenId = TK_NK_RSHIFT;
H
hzcheng 已提交
436 437
        return 2;
      } else {
438
        *tokenId = TK_NK_GT;
H
hzcheng 已提交
439 440 441 442 443
        return 1;
      }
    }
    case '!': {
      if (z[1] != '=') {
444
        *tokenId = TK_NK_ILLEGAL;
H
hzcheng 已提交
445 446
        return 2;
      } else {
447
        *tokenId = TK_NK_NE;
H
hzcheng 已提交
448 449 450 451 452
        return 2;
      }
    }
    case '|': {
      if (z[1] != '|') {
453
        *tokenId = TK_NK_BITOR;
H
hzcheng 已提交
454 455
        return 1;
      } else {
456
        *tokenId = TK_NK_CONCAT;
H
hzcheng 已提交
457 458 459 460
        return 2;
      }
    }
    case ',': {
461
      *tokenId = TK_NK_COMMA;
H
hzcheng 已提交
462 463 464
      return 1;
    }
    case '&': {
465
      *tokenId = TK_NK_BITAND;
H
hzcheng 已提交
466 467 468
      return 1;
    }
    case '~': {
469
      *tokenId = TK_NK_BITNOT;
H
hzcheng 已提交
470 471
      return 1;
    }
S
slguan 已提交
472
    case '?': {
473
      *tokenId = TK_NK_QUESTION;
S
slguan 已提交
474 475
      return 1;
    }
476
    case '`':
H
hzcheng 已提交
477 478
    case '\'':
    case '"': {
S
slguan 已提交
479 480
      int  delim = z[0];
      bool strEnd = false;
H
hzcheng 已提交
481
      for (i = 1; z[i]; i++) {
482
        if (delim != '`' && z[i] == '\\') {  // ignore the escaped character that follows this backslash
L
[1292]  
lihui 已提交
483 484 485
          i++;
          continue;
        }
X
Xiaoyu Wang 已提交
486

487
        if (z[i] == delim) {
H
hzcheng 已提交
488 489 490
          if (z[i + 1] == delim) {
            i++;
          } else {
H
huili 已提交
491
            strEnd = true;
H
hzcheng 已提交
492 493 494 495
            break;
          }
        }
      }
X
Xiaoyu Wang 已提交
496

H
hzcheng 已提交
497
      if (z[i]) i++;
H
huili 已提交
498

S
slguan 已提交
499
      if (strEnd) {
X
Xiaoyu Wang 已提交
500
        *tokenId = (delim == '`') ? TK_NK_ID : TK_NK_STRING;
S
slguan 已提交
501 502
        return i;
      }
H
huili 已提交
503

S
slguan 已提交
504
      break;
H
hzcheng 已提交
505 506
    }
    case '.': {
S
slguan 已提交
507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523
      /*
       * handle the the float number with out integer part
       * .123
       * .123e4
       */
      if (isdigit(z[1])) {
        for (i = 2; isdigit(z[i]); i++) {
        }

        if ((z[i] == 'e' || z[i] == 'E') &&
            (isdigit(z[i + 1]) || ((z[i + 1] == '+' || z[i + 1] == '-') && isdigit(z[i + 2])))) {
          i += 2;
          while (isdigit(z[i])) {
            i++;
          }
        }

524
        *tokenId = TK_NK_FLOAT;
S
slguan 已提交
525 526
        return i;
      } else {
527
        *tokenId = TK_NK_DOT;
S
slguan 已提交
528 529 530 531 532 533 534
        return 1;
      }
    }

    case '0': {
      char next = z[1];

X
Xiaoyu Wang 已提交
535
      if (next == 'b') {  // bin number
536
        *tokenId = TK_NK_BIN;
S
slguan 已提交
537 538 539 540 541 542 543 544
        for (i = 2; (z[i] == '0' || z[i] == '1'); ++i) {
        }

        if (i == 2) {
          break;
        }

        return i;
X
Xiaoyu Wang 已提交
545
      } else if (next == 'x') {  // hex number
546
        *tokenId = TK_NK_HEX;
S
slguan 已提交
547 548 549 550 551 552 553 554 555
        for (i = 2; isdigit(z[i]) || (z[i] >= 'a' && z[i] <= 'f') || (z[i] >= 'A' && z[i] <= 'F'); ++i) {
        }

        if (i == 2) {
          break;
        }

        return i;
      }
H
hzcheng 已提交
556 557 558 559 560 561 562 563 564 565
    }
    case '1':
    case '2':
    case '3':
    case '4':
    case '5':
    case '6':
    case '7':
    case '8':
    case '9': {
566
      *tokenId = TK_NK_INTEGER;
H
hzcheng 已提交
567 568 569
      for (i = 1; isdigit(z[i]); i++) {
      }

H
Haojun Liao 已提交
570
      /* here is the 1u/1a/2s/3m/9y */
X
Xiaoyu Wang 已提交
571 572 573
      if ((z[i] == 'b' || z[i] == 'u' || z[i] == 'a' || z[i] == 's' || z[i] == 'm' || z[i] == 'h' || z[i] == 'd' ||
           z[i] == 'n' || z[i] == 'y' || z[i] == 'w' || z[i] == 'B' || z[i] == 'U' || z[i] == 'A' || z[i] == 'S' ||
           z[i] == 'M' || z[i] == 'H' || z[i] == 'D' || z[i] == 'N' || z[i] == 'Y' || z[i] == 'W') &&
H
hjxilinx 已提交
574
          (isIdChar[(uint8_t)z[i + 1]] == 0)) {
575
        *tokenId = TK_NK_VARIABLE;
H
hzcheng 已提交
576 577 578 579 580 581 582 583 584 585
        i += 1;
        return i;
      }

      int32_t seg = 1;
      while (z[i] == '.' && isdigit(z[i + 1])) {
        i += 2;
        while (isdigit(z[i])) {
          i++;
        }
586
        *tokenId = TK_NK_FLOAT;
H
hzcheng 已提交
587 588 589 590
        seg++;
      }

      if (seg == 4) {  // ip address
591
        *tokenId = TK_NK_IPTOKEN;
H
hzcheng 已提交
592
        return i;
X
Xiaoyu Wang 已提交
593 594
      } else if (seg > 2) {
        break;
H
hzcheng 已提交
595 596 597 598 599 600 601 602
      }

      if ((z[i] == 'e' || z[i] == 'E') &&
          (isdigit(z[i + 1]) || ((z[i + 1] == '+' || z[i + 1] == '-') && isdigit(z[i + 2])))) {
        i += 2;
        while (isdigit(z[i])) {
          i++;
        }
603
        *tokenId = TK_NK_FLOAT;
H
hzcheng 已提交
604 605 606
      }
      return i;
    }
X
Xiaoyu Wang 已提交
607 608 609 610 611 612
    // case '[': {
    //   for (i = 1; z[i] && z[i - 1] != ']'; i++) {
    //   }
    //   *tokenId = TK_NK_ID;
    //   return i;
    // }
H
hzcheng 已提交
613 614 615 616
    case 'T':
    case 't':
    case 'F':
    case 'f': {
X
Xiaoyu Wang 已提交
617
      for (i = 1; ((z[i] & 0x80) == 0) && isIdChar[(uint8_t)z[i]]; i++) {
H
hzcheng 已提交
618 619 620
      }

      if ((i == 4 && strncasecmp(z, "true", 4) == 0) || (i == 5 && strncasecmp(z, "false", 5) == 0)) {
621
        *tokenId = TK_NK_BOOL;
H
hzcheng 已提交
622 623
        return i;
      }
X
Xiaoyu Wang 已提交
624 625
      *tokenId = tKeywordCode(z, i);
      return i;
H
hzcheng 已提交
626 627
    }
    default: {
X
Xiaoyu Wang 已提交
628
      if (((*z & 0x80) != 0) || !isIdChar[(uint8_t)*z]) {
H
hzcheng 已提交
629 630
        break;
      }
X
Xiaoyu Wang 已提交
631
      for (i = 1; ((z[i] & 0x80) == 0) && isIdChar[(uint8_t)z[i]]; i++) {
H
hzcheng 已提交
632
      }
633
      *tokenId = tKeywordCode(z, i);
H
hzcheng 已提交
634 635 636 637
      return i;
    }
  }

638
  *tokenId = TK_NK_ILLEGAL;
H
hzcheng 已提交
639 640 641
  return 0;
}

X
Xiaoyu Wang 已提交
642
SToken tStrGetToken(const char* str, int32_t* i, bool isPrevOptr, bool* pIgnoreComma) {
H
Haojun Liao 已提交
643
  SToken t0 = {0};
S
slguan 已提交
644

H
hzcheng 已提交
645 646
  // here we reach the end of sql string, null-terminated string
  if (str[*i] == 0) {
S
slguan 已提交
647 648
    t0.n = 0;
    return t0;
H
hzcheng 已提交
649 650
  }

651
  // IGNORE TK_NK_SPACE, TK_NK_COMMA, and specified tokens
S
slguan 已提交
652 653 654
  while (1) {
    *i += t0.n;

H
Haojun Liao 已提交
655
    int32_t numOfComma = 0;
X
Xiaoyu Wang 已提交
656
    char    t = str[*i];
H
Haojun Liao 已提交
657 658 659 660
    while (t == ' ' || t == '\n' || t == '\r' || t == '\t' || t == '\f' || t == ',') {
      if (t == ',' && (++numOfComma > 1)) {  // comma only allowed once
        t0.n = 0;
        return t0;
S
slguan 已提交
661
      }
X
Xiaoyu Wang 已提交
662

X
Xiaoyu Wang 已提交
663 664 665 666
      if (NULL != pIgnoreComma && t == ',') {
        *pIgnoreComma = true;
      }

H
Haojun Liao 已提交
667
      t = str[++(*i)];
S
slguan 已提交
668
    }
H
hzcheng 已提交
669

670
    t0.n = tGetToken(&str[*i], &t0.type);
H
Haojun Liao 已提交
671
    break;
S
slguan 已提交
672

H
Haojun Liao 已提交
673 674
    // not support user specfied ignored symbol list
#if 0
H
Haojun Liao 已提交
675
    bool ignore = false;
S
slguan 已提交
676 677
    for (uint32_t k = 0; k < numOfIgnoreToken; k++) {
      if (t0.type == ignoreTokenTypes[k]) {
H
Haojun Liao 已提交
678
        ignore = true;
S
slguan 已提交
679 680 681 682
        break;
      }
    }

H
Haojun Liao 已提交
683
    if (!ignore) {
S
slguan 已提交
684 685
      break;
    }
H
Haojun Liao 已提交
686
#endif
H
hzcheng 已提交
687 688
  }

689
  if (t0.type == TK_NK_SEMI) {
S
slguan 已提交
690
    t0.n = 0;
D
dapan1121 已提交
691
    t0.type = 0;
S
slguan 已提交
692 693 694 695 696 697 698 699
    return t0;
  }

  uint32_t type = 0;
  int32_t  len;

  // support parse the 'db.tbl' format, notes: There should be no space on either side of the dot!
  if ('.' == str[*i + t0.n]) {
700
    len = tGetToken(&str[*i + t0.n + 1], &type);
S
slguan 已提交
701 702

    // only id and string are valid
X
Xiaoyu Wang 已提交
703
    if (((TK_NK_STRING != t0.type) && (TK_NK_ID != t0.type)) || ((TK_NK_STRING != type) && (TK_NK_ID != type))) {
704
      t0.type = TK_NK_ILLEGAL;
S
slguan 已提交
705 706 707 708 709 710 711 712 713
      t0.n = 0;

      return t0;
    }

    t0.n += len + 1;

  } else {
    // support parse the -/+number format
X
Xiaoyu Wang 已提交
714
    if ((isPrevOptr) && (t0.type == TK_NK_MINUS || t0.type == TK_NK_PLUS)) {
715
      len = tGetToken(&str[*i + t0.n], &type);
716
      if (type == TK_NK_INTEGER || type == TK_NK_FLOAT) {
S
slguan 已提交
717 718 719
        t0.type = type;
        t0.n += len;
      }
H
hzcheng 已提交
720 721 722
    }
  }

X
Xiaoyu Wang 已提交
723
  t0.z = (char*)str + (*i);
S
slguan 已提交
724 725 726
  *i += t0.n;

  return t0;
H
hzcheng 已提交
727 728
}

X
Xiaoyu Wang 已提交
729
bool taosIsKeyWordToken(const char* z, int32_t len) { return (tKeywordCode((char*)z, len) != TK_NK_ID); }
H
Haojun Liao 已提交
730 731

void taosCleanupKeywordsTable() {
H
Haojun Liao 已提交
732 733 734 735
  void* m = keywordHashTable;
  if (m != NULL && atomic_val_compare_exchange_ptr(&keywordHashTable, m, 0) == m) {
    taosHashCleanup(m);
  }
Y
yihaoDeng 已提交
736
}