parTokenizer.c 21.2 KB
Newer Older
H
hzcheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

H
hjxilinx 已提交
16
#include "os.h"
X
Xiaoyu Wang 已提交
17
#include "parToken.h"
H
hjxilinx 已提交
18
#include "taosdef.h"
X
Xiaoyu Wang 已提交
19
#include "thash.h"
20
#include "ttokendef.h"
H
hzcheng 已提交
21

S
slguan 已提交
22
// All the keywords of the SQL language are stored in a hash table
H
hzcheng 已提交
23
typedef struct SKeyword {
S
slguan 已提交
24
  const char* name;  // The keyword name
H
Haojun Liao 已提交
25
  uint16_t    type;  // type
S
slguan 已提交
26
  uint8_t     len;   // length
H
hzcheng 已提交
27 28
} SKeyword;

X
Xiaoyu Wang 已提交
29
// clang-format off
S
slguan 已提交
30
// keywords in sql string
H
hzcheng 已提交
31
static SKeyword keywordTable[] = {
X
Xiaoyu Wang 已提交
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
    {"ACCOUNT",              TK_ACCOUNT},
    {"ACCOUNTS",             TK_ACCOUNTS},
    {"ADD",                  TK_ADD},
    {"AGGREGATE",            TK_AGGREGATE},
    {"ALL",                  TK_ALL},
    {"ALTER",                TK_ALTER},
    {"ANALYZE",              TK_ANALYZE},
    {"AND",                  TK_AND},
    {"APPS",                 TK_APPS},
    {"AS",                   TK_AS},
    {"ASC",                  TK_ASC},
    {"AT_ONCE",              TK_AT_ONCE},
    {"BALANCE",              TK_BALANCE},
    {"BETWEEN",              TK_BETWEEN},
    {"BIGINT",               TK_BIGINT},
X
Xiaoyu Wang 已提交
47
    {"BINARY",               TK_BINARY},
X
Xiaoyu Wang 已提交
48 49 50 51 52 53 54 55 56
    {"BNODE",                TK_BNODE},
    {"BNODES",               TK_BNODES},
    {"BOOL",                 TK_BOOL},
    {"BUFFER",               TK_BUFFER},
    {"BUFSIZE",              TK_BUFSIZE},
    {"BY",                   TK_BY},
    {"CACHE",                TK_CACHE},
    {"CACHEMODEL",           TK_CACHEMODEL},
    {"CACHESIZE",            TK_CACHESIZE},
X
Xiaoyu Wang 已提交
57
    {"CASE",                 TK_CASE},
X
Xiaoyu Wang 已提交
58 59 60 61 62 63
    {"CAST",                 TK_CAST},
    {"CLIENT_VERSION",       TK_CLIENT_VERSION},
    {"CLUSTER",              TK_CLUSTER},
    {"COLUMN",               TK_COLUMN},
    {"COMMENT",              TK_COMMENT},
    {"COMP",                 TK_COMP},
X
Xiaoyu Wang 已提交
64
    {"COMPACT",              TK_COMPACT},
X
Xiaoyu Wang 已提交
65 66
    {"CONNECTION",           TK_CONNECTION},
    {"CONNECTIONS",          TK_CONNECTIONS},
X
Xiaoyu Wang 已提交
67
    {"CONNS",                TK_CONNS},
X
Xiaoyu Wang 已提交
68 69 70 71 72 73 74 75 76 77
    {"CONSUMER",             TK_CONSUMER},
    {"CONSUMERS",            TK_CONSUMERS},
    {"CONTAINS",             TK_CONTAINS},
    {"COUNT",                TK_COUNT},
    {"CREATE",               TK_CREATE},
    {"CURRENT_USER",         TK_CURRENT_USER},
    {"DATABASE",             TK_DATABASE},
    {"DATABASES",            TK_DATABASES},
    {"DBS",                  TK_DBS},
    {"DELETE",               TK_DELETE},
78
    {"DELETE_MARK",          TK_DELETE_MARK},
X
Xiaoyu Wang 已提交
79 80 81 82 83 84 85 86 87
    {"DESC",                 TK_DESC},
    {"DESCRIBE",             TK_DESCRIBE},
    {"DISTINCT",             TK_DISTINCT},
    {"DISTRIBUTED",          TK_DISTRIBUTED},
    {"DNODE",                TK_DNODE},
    {"DNODES",               TK_DNODES},
    {"DOUBLE",               TK_DOUBLE},
    {"DROP",                 TK_DROP},
    {"DURATION",             TK_DURATION},
X
Xiaoyu Wang 已提交
88
    {"ELSE",                 TK_ELSE},
X
Xiaoyu Wang 已提交
89
    {"ENABLE",               TK_ENABLE},
X
Xiaoyu Wang 已提交
90
    {"END",                  TK_END},
X
Xiaoyu Wang 已提交
91 92 93
    {"EXISTS",               TK_EXISTS},
    {"EXPIRED",              TK_EXPIRED},
    {"EXPLAIN",              TK_EXPLAIN},
X
Xiaoyu Wang 已提交
94
    {"EVENT_WINDOW",         TK_EVENT_WINDOW},
X
Xiaoyu Wang 已提交
95 96 97
    {"EVERY",                TK_EVERY},
    {"FILE",                 TK_FILE},
    {"FILL",                 TK_FILL},
98
    {"FILL_HISTORY",         TK_FILL_HISTORY},
X
Xiaoyu Wang 已提交
99 100 101 102
    {"FIRST",                TK_FIRST},
    {"FLOAT",                TK_FLOAT},
    {"FLUSH",                TK_FLUSH},
    {"FROM",                 TK_FROM},
103
    {"FORCE",                TK_FORCE},
X
Xiaoyu Wang 已提交
104 105 106 107 108 109 110 111 112 113 114 115 116 117
    {"FUNCTION",             TK_FUNCTION},
    {"FUNCTIONS",            TK_FUNCTIONS},
    {"GRANT",                TK_GRANT},
    {"GRANTS",               TK_GRANTS},
    {"GROUP",                TK_GROUP},
    {"HAVING",               TK_HAVING},
    {"IF",                   TK_IF},
    {"IGNORE",               TK_IGNORE},
    {"IMPORT",               TK_IMPORT},
    {"IN",                   TK_IN},
    {"INDEX",                TK_INDEX},
    {"INDEXES",              TK_INDEXES},
    {"INNER",                TK_INNER},
    {"INSERT",               TK_INSERT},
X
Xiaoyu Wang 已提交
118
    {"INT",                  TK_INT},
X
Xiaoyu Wang 已提交
119 120 121 122 123 124 125 126
    {"INTEGER",              TK_INTEGER},
    {"INTERVAL",             TK_INTERVAL},
    {"INTO",                 TK_INTO},
    {"IS",                   TK_IS},
    {"JOIN",                 TK_JOIN},
    {"JSON",                 TK_JSON},
    {"KEEP",                 TK_KEEP},
    {"KILL",                 TK_KILL},
127
    {"LANGUAGE",             TK_LANGUAGE},
X
Xiaoyu Wang 已提交
128 129
    {"LAST",                 TK_LAST},
    {"LAST_ROW",             TK_LAST_ROW},
130
    {"LEADER",               TK_LEADER},
X
Xiaoyu Wang 已提交
131
    {"LICENCES",             TK_LICENCES},
X
Xiaoyu Wang 已提交
132 133 134 135 136 137 138
    {"LIKE",                 TK_LIKE},
    {"LIMIT",                TK_LIMIT},
    {"LINEAR",               TK_LINEAR},
    {"LOCAL",                TK_LOCAL},
    {"MATCH",                TK_MATCH},
    {"MAXROWS",              TK_MAXROWS},
    {"MAX_DELAY",            TK_MAX_DELAY},
139
    {"MAX_SPEED",            TK_MAX_SPEED},
X
Xiaoyu Wang 已提交
140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
    {"MERGE",                TK_MERGE},
    {"META",                 TK_META},
    {"MINROWS",              TK_MINROWS},
    {"MINUS",                TK_MINUS},
    {"MNODE",                TK_MNODE},
    {"MNODES",               TK_MNODES},
    {"MODIFY",               TK_MODIFY},
    {"MODULES",              TK_MODULES},
    {"NCHAR",                TK_NCHAR},
    {"NEXT",                 TK_NEXT},
    {"NMATCH",               TK_NMATCH},
    {"NONE",                 TK_NONE},
    {"NOT",                  TK_NOT},
    {"NOW",                  TK_NOW},
    {"NULL",                 TK_NULL},
D
dapan1121 已提交
155
    {"NULL_F",               TK_NULL_F},
X
Xiaoyu Wang 已提交
156 157 158 159 160 161 162 163
    {"NULLS",                TK_NULLS},
    {"OFFSET",               TK_OFFSET},
    {"ON",                   TK_ON},
    {"OR",                   TK_OR},
    {"ORDER",                TK_ORDER},
    {"OUTPUTTYPE",           TK_OUTPUTTYPE},
    {"PAGES",                TK_PAGES},
    {"PAGESIZE",             TK_PAGESIZE},
X
Xiaoyu Wang 已提交
164 165
    {"PARTITION",            TK_PARTITION},
    {"PASS",                 TK_PASS},
X
Xiaoyu Wang 已提交
166 167 168 169
    {"PORT",                 TK_PORT},
    {"PPS",                  TK_PPS},
    {"PRECISION",            TK_PRECISION},
    {"PREV",                 TK_PREV},
170
    {"PRIVILEGES",           TK_PRIVILEGES},
X
Xiaoyu Wang 已提交
171 172 173 174 175 176 177 178 179 180
    {"QNODE",                TK_QNODE},
    {"QNODES",               TK_QNODES},
    {"QTIME",                TK_QTIME},
    {"QUERIES",              TK_QUERIES},
    {"QUERY",                TK_QUERY},
    {"RANGE",                TK_RANGE},
    {"RATIO",                TK_RATIO},
    {"READ",                 TK_READ},
    {"REDISTRIBUTE",         TK_REDISTRIBUTE},
    {"RENAME",               TK_RENAME},
181
    {"REPLACE",              TK_REPLACE},
X
Xiaoyu Wang 已提交
182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205
    {"REPLICA",              TK_REPLICA},
    {"RESET",                TK_RESET},
    {"RETENTIONS",           TK_RETENTIONS},
    {"REVOKE",               TK_REVOKE},
    {"ROLLUP",               TK_ROLLUP},
    {"SCHEMALESS",           TK_SCHEMALESS},
    {"SCORES",               TK_SCORES},
    {"SELECT",               TK_SELECT},
    {"SERVER_STATUS",        TK_SERVER_STATUS},
    {"SERVER_VERSION",       TK_SERVER_VERSION},
    {"SESSION",              TK_SESSION},
    {"SET",                  TK_SET},
    {"SHOW",                 TK_SHOW},
    {"SINGLE_STABLE",        TK_SINGLE_STABLE},
    {"SLIDING",              TK_SLIDING},
    {"SLIMIT",               TK_SLIMIT},
    {"SMA",                  TK_SMA},
    {"SMALLINT",             TK_SMALLINT},
    {"SNODE",                TK_SNODE},
    {"SNODES",               TK_SNODES},
    {"SOFFSET",              TK_SOFFSET},
    {"SPLIT",                TK_SPLIT},
    {"STABLE",               TK_STABLE},
    {"STABLES",              TK_STABLES},
X
Xiaoyu Wang 已提交
206
    {"START",                TK_START},
X
Xiaoyu Wang 已提交
207 208 209 210 211 212
    {"STATE",                TK_STATE},
    {"STATE_WINDOW",         TK_STATE_WINDOW},
    {"STORAGE",              TK_STORAGE},
    {"STREAM",               TK_STREAM},
    {"STREAMS",              TK_STREAMS},
    {"STRICT",               TK_STRICT},
X
Xiaoyu Wang 已提交
213
    {"STT_TRIGGER",          TK_STT_TRIGGER},
214
    {"SUBSCRIBE",            TK_SUBSCRIBE},
X
Xiaoyu Wang 已提交
215
    {"SUBSCRIPTIONS",        TK_SUBSCRIPTIONS},
216
    {"SUBTABLE",             TK_SUBTABLE},
X
Xiaoyu Wang 已提交
217 218 219
    {"SYSINFO",              TK_SYSINFO},
    {"TABLE",                TK_TABLE},
    {"TABLES",               TK_TABLES},
220 221
    {"TABLE_PREFIX",         TK_TABLE_PREFIX},
    {"TABLE_SUFFIX",         TK_TABLE_SUFFIX},
X
Xiaoyu Wang 已提交
222 223 224
    {"TAG",                  TK_TAG},
    {"TAGS",                 TK_TAGS},
    {"TBNAME",               TK_TBNAME},
X
Xiaoyu Wang 已提交
225
    {"THEN",                 TK_THEN},
X
Xiaoyu Wang 已提交
226 227 228 229 230 231 232 233 234 235 236
    {"TIMESTAMP",            TK_TIMESTAMP},
    {"TIMEZONE",             TK_TIMEZONE},
    {"TINYINT",              TK_TINYINT},
    {"TO",                   TK_TO},
    {"TODAY",                TK_TODAY},
    {"TOPIC",                TK_TOPIC},
    {"TOPICS",               TK_TOPICS},
    {"TRANSACTION",          TK_TRANSACTION},
    {"TRANSACTIONS",         TK_TRANSACTIONS},
    {"TRIGGER",              TK_TRIGGER},
    {"TRIM",                 TK_TRIM},
237
    {"TSDB_PAGESIZE",        TK_TSDB_PAGESIZE},
X
Xiaoyu Wang 已提交
238 239 240 241
    {"TSERIES",              TK_TSERIES},
    {"TTL",                  TK_TTL},
    {"UNION",                TK_UNION},
    {"UNSIGNED",             TK_UNSIGNED},
242
    {"UPDATE",               TK_UPDATE},
X
Xiaoyu Wang 已提交
243 244 245 246 247
    {"USE",                  TK_USE},
    {"USER",                 TK_USER},
    {"USERS",                TK_USERS},
    {"USING",                TK_USING},
    {"VALUE",                TK_VALUE},
D
dapan1121 已提交
248
    {"VALUE_F",              TK_VALUE_F},
X
Xiaoyu Wang 已提交
249 250 251 252 253 254
    {"VALUES",               TK_VALUES},
    {"VARCHAR",              TK_VARCHAR},
    {"VARIABLES",            TK_VARIABLES},
    {"VERBOSE",              TK_VERBOSE},
    {"VGROUP",               TK_VGROUP},
    {"VGROUPS",              TK_VGROUPS},
255
    {"VNODES",               TK_VNODES},
X
Xiaoyu Wang 已提交
256 257
    {"WAL_FSYNC_PERIOD",     TK_WAL_FSYNC_PERIOD},
    {"WAL_LEVEL",            TK_WAL_LEVEL},
X
Xiaoyu Wang 已提交
258 259 260 261
    {"WAL_RETENTION_PERIOD", TK_WAL_RETENTION_PERIOD},
    {"WAL_RETENTION_SIZE",   TK_WAL_RETENTION_SIZE},
    {"WAL_ROLL_PERIOD",      TK_WAL_ROLL_PERIOD},
    {"WAL_SEGMENT_SIZE",     TK_WAL_SEGMENT_SIZE},
X
Xiaoyu Wang 已提交
262
    {"WATERMARK",            TK_WATERMARK},
X
Xiaoyu Wang 已提交
263
    {"WHEN",                 TK_WHEN},
X
Xiaoyu Wang 已提交
264 265 266 267 268
    {"WHERE",                TK_WHERE},
    {"WINDOW_CLOSE",         TK_WINDOW_CLOSE},
    {"WITH",                 TK_WITH},
    {"WRITE",                TK_WRITE},
    {"_C0",                  TK_ROWTS},
269
    {"_IROWTS",              TK_IROWTS},
270
    {"_ISFILLED",            TK_ISFILLED},
X
Xiaoyu Wang 已提交
271 272 273 274
    {"_QDURATION",           TK_QDURATION},
    {"_QEND",                TK_QEND},
    {"_QSTART",              TK_QSTART},
    {"_ROWTS",               TK_ROWTS},
275
    {"_TAGS",                TK_QTAGS},
X
Xiaoyu Wang 已提交
276 277 278
    {"_WDURATION",           TK_WDURATION},
    {"_WEND",                TK_WEND},
    {"_WSTART",              TK_WSTART},
279
    {"ALIVE",                TK_ALIVE},
H
hzcheng 已提交
280
};
X
Xiaoyu Wang 已提交
281
// clang-format on
H
hzcheng 已提交
282 283 284 285 286 287 288 289 290 291 292 293 294

static const char isIdChar[] = {
    /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
};

H
Haojun Liao 已提交
295
static void* keywordHashTable = NULL;
H
hzcheng 已提交
296

S
TD-1057  
Shengliang Guan 已提交
297
static void doInitKeywordsTable(void) {
298
  int numOfEntries = tListLen(keywordTable);
X
Xiaoyu Wang 已提交
299

H
Haojun Liao 已提交
300
  keywordHashTable = taosHashInit(numOfEntries, MurmurHash3_32, true, false);
301
  for (int32_t i = 0; i < numOfEntries; i++) {
S
TD-1057  
Shengliang Guan 已提交
302
    keywordTable[i].len = (uint8_t)strlen(keywordTable[i].name);
303
    void* ptr = &keywordTable[i];
H
Haojun Liao 已提交
304
    taosHashPut(keywordHashTable, keywordTable[i].name, keywordTable[i].len, (void*)&ptr, POINTER_BYTES);
H
hzcheng 已提交
305
  }
306 307
}

wafwerar's avatar
wafwerar 已提交
308
static TdThreadOnce keywordsHashTableInit = PTHREAD_ONCE_INIT;
309

310
static int32_t tKeywordCode(const char* z, int n) {
wafwerar's avatar
wafwerar 已提交
311
  taosThreadOnce(&keywordsHashTableInit, doInitKeywordsTable);
X
Xiaoyu Wang 已提交
312

H
hjxilinx 已提交
313
  char key[512] = {0};
X
Xiaoyu Wang 已提交
314
  if (n > tListLen(key)) {  // too long token, can not be any other token type
315
    return TK_NK_ID;
316
  }
X
Xiaoyu Wang 已提交
317

H
hzcheng 已提交
318 319
  for (int32_t j = 0; j < n; ++j) {
    if (z[j] >= 'a' && z[j] <= 'z') {
320
      key[j] = (char)(z[j] & 0xDF);  // to uppercase and set the null-terminated
H
hzcheng 已提交
321 322 323 324 325
    } else {
      key[j] = z[j];
    }
  }

D
dapan1121 已提交
326
  if (keywordHashTable == NULL) {
327
    return TK_NK_ILLEGAL;
D
dapan1121 已提交
328
  }
H
Haojun Liao 已提交
329

H
Haojun Liao 已提交
330
  SKeyword** pKey = (SKeyword**)taosHashGet(keywordHashTable, key, n);
X
Xiaoyu Wang 已提交
331
  return (pKey != NULL) ? (*pKey)->type : TK_NK_ID;
H
hzcheng 已提交
332 333
}

H
huili 已提交
334
/*
335 336 337
 * Return the length of the token that begins at z[0].
 * Store the token type in *type before returning.
 */
338
uint32_t tGetToken(const char* z, uint32_t* tokenId) {
339
  uint32_t i;
H
hzcheng 已提交
340 341 342 343 344 345 346 347
  switch (*z) {
    case ' ':
    case '\t':
    case '\n':
    case '\f':
    case '\r': {
      for (i = 1; isspace(z[i]); i++) {
      }
348
      *tokenId = TK_NK_SPACE;
H
hzcheng 已提交
349 350 351
      return i;
    }
    case ':': {
352
      *tokenId = TK_NK_COLON;
H
hzcheng 已提交
353 354 355 356 357 358
      return 1;
    }
    case '-': {
      if (z[1] == '-') {
        for (i = 2; z[i] && z[i] != '\n'; i++) {
        }
359
        *tokenId = TK_NK_COMMENT;
H
hzcheng 已提交
360
        return i;
361 362 363
      } else if (z[1] == '>') {
        *tokenId = TK_NK_ARROW;
        return 2;
H
hzcheng 已提交
364
      }
X
Xiaoyu Wang 已提交
365
      *tokenId = TK_NK_MINUS;
H
hzcheng 已提交
366 367 368
      return 1;
    }
    case '(': {
369
      *tokenId = TK_NK_LP;
H
hzcheng 已提交
370 371 372
      return 1;
    }
    case ')': {
373
      *tokenId = TK_NK_RP;
H
hzcheng 已提交
374 375 376
      return 1;
    }
    case ';': {
377
      *tokenId = TK_NK_SEMI;
H
hzcheng 已提交
378 379 380
      return 1;
    }
    case '+': {
381
      *tokenId = TK_NK_PLUS;
H
hzcheng 已提交
382 383 384
      return 1;
    }
    case '*': {
385
      *tokenId = TK_NK_STAR;
H
hzcheng 已提交
386 387 388 389
      return 1;
    }
    case '/': {
      if (z[1] != '*' || z[2] == 0) {
390
        *tokenId = TK_NK_SLASH;
H
hzcheng 已提交
391 392 393 394 395
        return 1;
      }
      for (i = 3; z[i] && (z[i] != '/' || z[i - 1] != '*'); i++) {
      }
      if (z[i]) i++;
396
      *tokenId = TK_NK_COMMENT;
H
hzcheng 已提交
397 398 399
      return i;
    }
    case '%': {
400
      *tokenId = TK_NK_REM;
H
hzcheng 已提交
401 402 403
      return 1;
    }
    case '=': {
404
      *tokenId = TK_NK_EQ;
H
hzcheng 已提交
405 406 407 408
      return 1 + (z[1] == '=');
    }
    case '<': {
      if (z[1] == '=') {
409
        *tokenId = TK_NK_LE;
H
hzcheng 已提交
410 411
        return 2;
      } else if (z[1] == '>') {
412
        *tokenId = TK_NK_NE;
H
hzcheng 已提交
413 414
        return 2;
      } else if (z[1] == '<') {
415
        *tokenId = TK_NK_LSHIFT;
H
hzcheng 已提交
416 417
        return 2;
      } else {
418
        *tokenId = TK_NK_LT;
H
hzcheng 已提交
419 420 421 422 423
        return 1;
      }
    }
    case '>': {
      if (z[1] == '=') {
424
        *tokenId = TK_NK_GE;
H
hzcheng 已提交
425 426
        return 2;
      } else if (z[1] == '>') {
427
        *tokenId = TK_NK_RSHIFT;
H
hzcheng 已提交
428 429
        return 2;
      } else {
430
        *tokenId = TK_NK_GT;
H
hzcheng 已提交
431 432 433 434 435
        return 1;
      }
    }
    case '!': {
      if (z[1] != '=') {
436
        *tokenId = TK_NK_ILLEGAL;
H
hzcheng 已提交
437 438
        return 2;
      } else {
439
        *tokenId = TK_NK_NE;
H
hzcheng 已提交
440 441 442 443 444
        return 2;
      }
    }
    case '|': {
      if (z[1] != '|') {
445
        *tokenId = TK_NK_BITOR;
H
hzcheng 已提交
446 447
        return 1;
      } else {
448
        *tokenId = TK_NK_CONCAT;
H
hzcheng 已提交
449 450 451 452
        return 2;
      }
    }
    case ',': {
453
      *tokenId = TK_NK_COMMA;
H
hzcheng 已提交
454 455 456
      return 1;
    }
    case '&': {
457
      *tokenId = TK_NK_BITAND;
H
hzcheng 已提交
458 459 460
      return 1;
    }
    case '~': {
461
      *tokenId = TK_NK_BITNOT;
H
hzcheng 已提交
462 463
      return 1;
    }
S
slguan 已提交
464
    case '?': {
465
      *tokenId = TK_NK_QUESTION;
S
slguan 已提交
466 467
      return 1;
    }
468
    case '`':
H
hzcheng 已提交
469 470
    case '\'':
    case '"': {
S
slguan 已提交
471 472
      int  delim = z[0];
      bool strEnd = false;
H
hzcheng 已提交
473
      for (i = 1; z[i]; i++) {
474
        if (delim != '`' && z[i] == '\\') {  // ignore the escaped character that follows this backslash
L
[1292]  
lihui 已提交
475 476 477
          i++;
          continue;
        }
X
Xiaoyu Wang 已提交
478

479
        if (z[i] == delim) {
H
hzcheng 已提交
480 481 482
          if (z[i + 1] == delim) {
            i++;
          } else {
H
huili 已提交
483
            strEnd = true;
H
hzcheng 已提交
484 485 486 487
            break;
          }
        }
      }
X
Xiaoyu Wang 已提交
488

H
hzcheng 已提交
489
      if (z[i]) i++;
H
huili 已提交
490

S
slguan 已提交
491
      if (strEnd) {
X
Xiaoyu Wang 已提交
492
        *tokenId = (delim == '`') ? TK_NK_ID : TK_NK_STRING;
S
slguan 已提交
493 494
        return i;
      }
H
huili 已提交
495

S
slguan 已提交
496
      break;
H
hzcheng 已提交
497 498
    }
    case '.': {
S
slguan 已提交
499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515
      /*
       * handle the the float number with out integer part
       * .123
       * .123e4
       */
      if (isdigit(z[1])) {
        for (i = 2; isdigit(z[i]); i++) {
        }

        if ((z[i] == 'e' || z[i] == 'E') &&
            (isdigit(z[i + 1]) || ((z[i + 1] == '+' || z[i + 1] == '-') && isdigit(z[i + 2])))) {
          i += 2;
          while (isdigit(z[i])) {
            i++;
          }
        }

516
        *tokenId = TK_NK_FLOAT;
S
slguan 已提交
517 518
        return i;
      } else {
519
        *tokenId = TK_NK_DOT;
S
slguan 已提交
520 521 522 523 524 525 526
        return 1;
      }
    }

    case '0': {
      char next = z[1];

X
Xiaoyu Wang 已提交
527
      if (next == 'b') {  // bin number
528
        *tokenId = TK_NK_BIN;
S
slguan 已提交
529 530 531 532 533 534 535 536
        for (i = 2; (z[i] == '0' || z[i] == '1'); ++i) {
        }

        if (i == 2) {
          break;
        }

        return i;
X
Xiaoyu Wang 已提交
537
      } else if (next == 'x') {  // hex number
538
        *tokenId = TK_NK_HEX;
S
slguan 已提交
539 540 541 542 543 544 545 546 547
        for (i = 2; isdigit(z[i]) || (z[i] >= 'a' && z[i] <= 'f') || (z[i] >= 'A' && z[i] <= 'F'); ++i) {
        }

        if (i == 2) {
          break;
        }

        return i;
      }
H
hzcheng 已提交
548 549 550 551 552 553 554 555 556 557
    }
    case '1':
    case '2':
    case '3':
    case '4':
    case '5':
    case '6':
    case '7':
    case '8':
    case '9': {
558
      *tokenId = TK_NK_INTEGER;
H
hzcheng 已提交
559 560 561
      for (i = 1; isdigit(z[i]); i++) {
      }

H
Haojun Liao 已提交
562
      /* here is the 1u/1a/2s/3m/9y */
X
Xiaoyu Wang 已提交
563 564 565
      if ((z[i] == 'b' || z[i] == 'u' || z[i] == 'a' || z[i] == 's' || z[i] == 'm' || z[i] == 'h' || z[i] == 'd' ||
           z[i] == 'n' || z[i] == 'y' || z[i] == 'w' || z[i] == 'B' || z[i] == 'U' || z[i] == 'A' || z[i] == 'S' ||
           z[i] == 'M' || z[i] == 'H' || z[i] == 'D' || z[i] == 'N' || z[i] == 'Y' || z[i] == 'W') &&
H
hjxilinx 已提交
566
          (isIdChar[(uint8_t)z[i + 1]] == 0)) {
567
        *tokenId = TK_NK_VARIABLE;
H
hzcheng 已提交
568 569 570 571 572 573 574 575 576 577
        i += 1;
        return i;
      }

      int32_t seg = 1;
      while (z[i] == '.' && isdigit(z[i + 1])) {
        i += 2;
        while (isdigit(z[i])) {
          i++;
        }
578
        *tokenId = TK_NK_FLOAT;
H
hzcheng 已提交
579 580 581 582
        seg++;
      }

      if (seg == 4) {  // ip address
583
        *tokenId = TK_NK_IPTOKEN;
H
hzcheng 已提交
584
        return i;
X
Xiaoyu Wang 已提交
585 586
      } else if (seg > 2) {
        break;
H
hzcheng 已提交
587 588 589 590 591 592 593 594
      }

      if ((z[i] == 'e' || z[i] == 'E') &&
          (isdigit(z[i + 1]) || ((z[i + 1] == '+' || z[i + 1] == '-') && isdigit(z[i + 2])))) {
        i += 2;
        while (isdigit(z[i])) {
          i++;
        }
595
        *tokenId = TK_NK_FLOAT;
H
hzcheng 已提交
596 597 598
      }
      return i;
    }
X
Xiaoyu Wang 已提交
599 600 601 602 603 604
    // case '[': {
    //   for (i = 1; z[i] && z[i - 1] != ']'; i++) {
    //   }
    //   *tokenId = TK_NK_ID;
    //   return i;
    // }
H
hzcheng 已提交
605 606 607 608
    case 'T':
    case 't':
    case 'F':
    case 'f': {
X
Xiaoyu Wang 已提交
609
      for (i = 1; ((z[i] & 0x80) == 0) && isIdChar[(uint8_t)z[i]]; i++) {
H
hzcheng 已提交
610 611 612
      }

      if ((i == 4 && strncasecmp(z, "true", 4) == 0) || (i == 5 && strncasecmp(z, "false", 5) == 0)) {
613
        *tokenId = TK_NK_BOOL;
H
hzcheng 已提交
614 615
        return i;
      }
X
Xiaoyu Wang 已提交
616 617
      *tokenId = tKeywordCode(z, i);
      return i;
H
hzcheng 已提交
618 619
    }
    default: {
X
Xiaoyu Wang 已提交
620
      if (((*z & 0x80) != 0) || !isIdChar[(uint8_t)*z]) {
H
hzcheng 已提交
621 622
        break;
      }
X
Xiaoyu Wang 已提交
623
      for (i = 1; ((z[i] & 0x80) == 0) && isIdChar[(uint8_t)z[i]]; i++) {
H
hzcheng 已提交
624
      }
625
      *tokenId = tKeywordCode(z, i);
H
hzcheng 已提交
626 627 628 629
      return i;
    }
  }

630
  *tokenId = TK_NK_ILLEGAL;
H
hzcheng 已提交
631 632 633
  return 0;
}

X
Xiaoyu Wang 已提交
634
SToken tStrGetToken(const char* str, int32_t* i, bool isPrevOptr, bool* pIgnoreComma) {
H
Haojun Liao 已提交
635
  SToken t0 = {0};
S
slguan 已提交
636

H
hzcheng 已提交
637 638
  // here we reach the end of sql string, null-terminated string
  if (str[*i] == 0) {
S
slguan 已提交
639 640
    t0.n = 0;
    return t0;
H
hzcheng 已提交
641 642
  }

643
  // IGNORE TK_NK_SPACE, TK_NK_COMMA, and specified tokens
S
slguan 已提交
644 645 646
  while (1) {
    *i += t0.n;

H
Haojun Liao 已提交
647
    int32_t numOfComma = 0;
X
Xiaoyu Wang 已提交
648
    char    t = str[*i];
H
Haojun Liao 已提交
649 650 651 652
    while (t == ' ' || t == '\n' || t == '\r' || t == '\t' || t == '\f' || t == ',') {
      if (t == ',' && (++numOfComma > 1)) {  // comma only allowed once
        t0.n = 0;
        return t0;
S
slguan 已提交
653
      }
X
Xiaoyu Wang 已提交
654

X
Xiaoyu Wang 已提交
655 656 657 658
      if (NULL != pIgnoreComma && t == ',') {
        *pIgnoreComma = true;
      }

H
Haojun Liao 已提交
659
      t = str[++(*i)];
S
slguan 已提交
660
    }
H
hzcheng 已提交
661

662
    t0.n = tGetToken(&str[*i], &t0.type);
H
Haojun Liao 已提交
663
    break;
S
slguan 已提交
664

H
Haojun Liao 已提交
665 666
    // not support user specfied ignored symbol list
#if 0
H
Haojun Liao 已提交
667
    bool ignore = false;
S
slguan 已提交
668 669
    for (uint32_t k = 0; k < numOfIgnoreToken; k++) {
      if (t0.type == ignoreTokenTypes[k]) {
H
Haojun Liao 已提交
670
        ignore = true;
S
slguan 已提交
671 672 673 674
        break;
      }
    }

H
Haojun Liao 已提交
675
    if (!ignore) {
S
slguan 已提交
676 677
      break;
    }
H
Haojun Liao 已提交
678
#endif
H
hzcheng 已提交
679 680
  }

681
  if (t0.type == TK_NK_SEMI) {
S
slguan 已提交
682
    t0.n = 0;
D
dapan1121 已提交
683
    t0.type = 0;
S
slguan 已提交
684 685 686 687 688 689 690 691
    return t0;
  }

  uint32_t type = 0;
  int32_t  len;

  // support parse the 'db.tbl' format, notes: There should be no space on either side of the dot!
  if ('.' == str[*i + t0.n]) {
692
    len = tGetToken(&str[*i + t0.n + 1], &type);
S
slguan 已提交
693 694

    // only id and string are valid
695
    if ((TK_NK_STRING != t0.type) && (TK_NK_ID != t0.type)) {
696
      t0.type = TK_NK_ILLEGAL;
S
slguan 已提交
697 698 699 700 701 702 703 704 705
      t0.n = 0;

      return t0;
    }

    t0.n += len + 1;

  } else {
    // support parse the -/+number format
X
Xiaoyu Wang 已提交
706
    if ((isPrevOptr) && (t0.type == TK_NK_MINUS || t0.type == TK_NK_PLUS)) {
707
      len = tGetToken(&str[*i + t0.n], &type);
708
      if (type == TK_NK_INTEGER || type == TK_NK_FLOAT) {
S
slguan 已提交
709 710 711
        t0.type = type;
        t0.n += len;
      }
H
hzcheng 已提交
712 713 714
    }
  }

X
Xiaoyu Wang 已提交
715
  t0.z = (char*)str + (*i);
S
slguan 已提交
716 717 718
  *i += t0.n;

  return t0;
H
hzcheng 已提交
719 720
}

X
Xiaoyu Wang 已提交
721
bool taosIsKeyWordToken(const char* z, int32_t len) { return (tKeywordCode((char*)z, len) != TK_NK_ID); }
H
Haojun Liao 已提交
722 723

void taosCleanupKeywordsTable() {
H
Haojun Liao 已提交
724 725 726 727
  void* m = keywordHashTable;
  if (m != NULL && atomic_val_compare_exchange_ptr(&keywordHashTable, m, 0) == m) {
    taosHashCleanup(m);
  }
Y
yihaoDeng 已提交
728
}