parTokenizer.c 19.8 KB
Newer Older
H
hzcheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

H
hjxilinx 已提交
16
#include "os.h"
X
Xiaoyu Wang 已提交
17
#include "parToken.h"
H
Haojun Liao 已提交
18
#include "thash.h"
H
hjxilinx 已提交
19
#include "taosdef.h"
20
#include "ttokendef.h"
H
hzcheng 已提交
21

S
slguan 已提交
22
// All the keywords of the SQL language are stored in a hash table
H
hzcheng 已提交
23
typedef struct SKeyword {
S
slguan 已提交
24
  const char* name;  // The keyword name
H
Haojun Liao 已提交
25
  uint16_t    type;  // type
S
slguan 已提交
26
  uint8_t     len;   // length
H
hzcheng 已提交
27 28
} SKeyword;

S
slguan 已提交
29
// keywords in sql string
H
hzcheng 已提交
30
static SKeyword keywordTable[] = {
31
    {"ACCOUNT",       TK_ACCOUNT},
32 33 34
    {"ACCOUNTS",      TK_ACCOUNTS},
    {"ADD",           TK_ADD},
    {"AGGREGATE",     TK_AGGREGATE},
35
    {"ALL",           TK_ALL},
36
    {"ALTER",         TK_ALTER},
37
    {"ANALYZE",       TK_ANALYZE},
38
    {"AND",           TK_AND},
39
    {"APPS",          TK_APPS},
40 41
    {"AS",            TK_AS},
    {"ASC",           TK_ASC},
42
    {"AT_ONCE",       TK_AT_ONCE},
43 44 45 46
    {"BETWEEN",       TK_BETWEEN},
    {"BINARY",        TK_BINARY},
    {"BIGINT",        TK_BIGINT},
    {"BLOCKS",        TK_BLOCKS},
47 48
    {"BNODE",         TK_BNODE},
    {"BNODES",        TK_BNODES},
49
    {"BOOL",          TK_BOOL},
50
    {"BUFSIZE",       TK_BUFSIZE},
51 52 53
    {"BY",            TK_BY},
    {"CACHE",         TK_CACHE},
    {"CACHELAST",     TK_CACHELAST},
54
    {"CAST",          TK_CAST},
55
    {"COLUMN",        TK_COLUMN},
56 57
    {"COMMENT",       TK_COMMENT},
    {"COMP",          TK_COMP},
58 59 60 61
    {"COMPACT",       TK_COMPACT},
    {"CONNS",         TK_CONNS},
    {"CONNECTION",    TK_CONNECTION},
    {"CONNECTIONS",   TK_CONNECTIONS},
62
    {"COUNT",         TK_COUNT},
63 64 65 66
    {"CREATE",        TK_CREATE},
    {"DATABASE",      TK_DATABASE},
    {"DATABASES",     TK_DATABASES},
    {"DAYS",          TK_DAYS},
67
    {"DBS",           TK_DBS},
X
Xiaoyu Wang 已提交
68
    {"DELAY",         TK_DELAY},
69
    {"DESC",          TK_DESC},
70
    {"DESCRIBE",      TK_DESCRIBE},
71
    {"DISTINCT",      TK_DISTINCT},
72 73
    {"DNODE",         TK_DNODE},
    {"DNODES",        TK_DNODES},
74
    {"DOUBLE",        TK_DOUBLE},
75
    {"DROP",          TK_DROP},
76
    {"EXISTS",        TK_EXISTS},
77
    {"EXPLAIN",       TK_EXPLAIN},
X
Xiaoyu Wang 已提交
78
    {"FILE_FACTOR",   TK_FILE_FACTOR},
79
    {"FILL",          TK_FILL},
X
Xiaoyu Wang 已提交
80
    {"FIRST",         TK_FIRST},
81 82 83
    {"FLOAT",         TK_FLOAT},
    {"FROM",          TK_FROM},
    {"FSYNC",         TK_FSYNC},
X
Xiaoyu Wang 已提交
84
    {"FUNCTION",      TK_FUNCTION},
X
Xiaoyu Wang 已提交
85
    {"FUNCTIONS",     TK_FUNCTIONS},
86
    {"GRANTS",        TK_GRANTS},
87 88 89 90 91
    {"GROUP",         TK_GROUP},
    {"HAVING",        TK_HAVING},
    {"IF",            TK_IF},
    {"IMPORT",        TK_IMPORT},
    {"IN",            TK_IN},
X
Xiaoyu Wang 已提交
92
    {"INDEX",         TK_INDEX},
X
Xiaoyu Wang 已提交
93
    {"INDEXES",       TK_INDEXES},
94 95 96 97 98 99 100 101 102 103
    {"INNER",         TK_INNER},
    {"INT",           TK_INT},
    {"INSERT",        TK_INSERT},
    {"INTEGER",       TK_INTEGER},
    {"INTERVAL",      TK_INTERVAL},
    {"INTO",          TK_INTO},
    {"IS",            TK_IS},
    {"JOIN",          TK_JOIN},
    {"JSON",          TK_JSON},
    {"KEEP",          TK_KEEP},
104
    {"KILL",          TK_KILL},
X
Xiaoyu Wang 已提交
105
    {"LAST",          TK_LAST},
106
    {"LAST_ROW",      TK_LAST_ROW},
107
    {"LICENCE",       TK_LICENCE},
108 109 110
    {"LIKE",          TK_LIKE},
    {"LIMIT",         TK_LIMIT},
    {"LINEAR",        TK_LINEAR},
111
    {"LOCAL",         TK_LOCAL},
112 113 114 115
    {"MATCH",         TK_MATCH},
    {"MAXROWS",       TK_MAXROWS},
    {"MINROWS",       TK_MINROWS},
    {"MINUS",         TK_MINUS},
116
    {"MNODE",         TK_MNODE},
X
Xiaoyu Wang 已提交
117
    {"MNODES",        TK_MNODES},
118
    {"MODIFY",        TK_MODIFY},
X
Xiaoyu Wang 已提交
119
    {"MODULES",       TK_MODULES},
120 121 122 123 124 125
    {"NCHAR",         TK_NCHAR},
    {"NMATCH",        TK_NMATCH},
    {"NONE",          TK_NONE},
    {"NOT",           TK_NOT},
    {"NOW",           TK_NOW},
    {"NULL",          TK_NULL},
X
Xiaoyu Wang 已提交
126
    {"NULLS",         TK_NULLS},
127 128 129 130
    {"OFFSET",        TK_OFFSET},
    {"ON",            TK_ON},
    {"OR",            TK_OR},
    {"ORDER",         TK_ORDER},
131
    {"OUTPUTTYPE",    TK_OUTPUTTYPE},
132
    {"PARTITION",     TK_PARTITION},
133 134
    {"PASS",          TK_PASS},
    {"PORT",          TK_PORT},
135
    {"PPS",           TK_PPS},
136 137 138
    {"PRECISION",     TK_PRECISION},
    {"PRIVILEGE",     TK_PRIVILEGE},
    {"PREV",          TK_PREV},
X
Xiaoyu Wang 已提交
139
    {"QNODE",         TK_QNODE},
X
Xiaoyu Wang 已提交
140
    {"QNODES",        TK_QNODES},
141 142
    {"QTIME",         TK_QTIME},
    {"QUERIES",       TK_QUERIES},
143
    {"QUERY",         TK_QUERY},
144
    {"QUORUM",        TK_QUORUM},
145
    {"RATIO",         TK_RATIO},
146
    {"REPLICA",       TK_REPLICA},
147
    {"RESET",         TK_RESET},
X
Xiaoyu Wang 已提交
148 149
    {"RETENTIONS",    TK_RETENTIONS},
    {"ROLLUP",        TK_ROLLUP},
150
    {"SCORES",        TK_SCORES},
151 152
    {"SELECT",        TK_SELECT},
    {"SESSION",       TK_SESSION},
153
    {"SET",           TK_SET},
154 155 156 157 158 159
    {"SHOW",          TK_SHOW},
    {"SINGLE_STABLE", TK_SINGLE_STABLE},
    {"SLIDING",       TK_SLIDING},
    {"SLIMIT",        TK_SLIMIT},
    {"SMA",           TK_SMA},
    {"SMALLINT",      TK_SMALLINT},
160 161
    {"SNODE",         TK_SNODE},
    {"SNODES",        TK_SNODES},
162
    {"SOFFSET",       TK_SOFFSET},
163
    {"STABLE",        TK_STABLE},
164
    {"STABLES",       TK_STABLES},
165
    {"STATE",         TK_STATE},
166
    {"STATE_WINDOW",  TK_STATE_WINDOW},
167
    {"STORAGE",       TK_STORAGE},
168
    {"STREAM",        TK_STREAM},
X
Xiaoyu Wang 已提交
169
    {"STREAMS",       TK_STREAMS},
170
    {"STREAM_MODE",   TK_STREAM_MODE},
171
    {"SYNCDB",        TK_SYNCDB},
172 173
    {"TABLE",         TK_TABLE},
    {"TABLES",        TK_TABLES},
174
    {"TAG",           TK_TAG},
175
    {"TAGS",          TK_TAGS},
176
    {"TBNAME",        TK_TBNAME},
177
    {"TIMESTAMP",     TK_TIMESTAMP},
178
    {"TIMEZONE",      TK_TIMEZONE},
179
    {"TINYINT",       TK_TINYINT},
180
    {"TODAY",         TK_TODAY},
181
    {"TOPIC",         TK_TOPIC},
182
    {"TOPICS",        TK_TOPICS},
183
    {"TRIGGER",       TK_TRIGGER},
184
    {"TSERIES",       TK_TSERIES},
185 186 187 188
    {"TTL",           TK_TTL},
    {"UNION",         TK_UNION},
    {"UNSIGNED",      TK_UNSIGNED},
    {"USE",           TK_USE},
189 190
    {"USER",          TK_USER},
    {"USERS",         TK_USERS},
191
    {"USING",         TK_USING},
X
Xiaoyu Wang 已提交
192
    {"VALUE",         TK_VALUE},
193 194
    {"VALUES",        TK_VALUES},
    {"VARCHAR",       TK_VARCHAR},
195
    {"VARIABLES",     TK_VARIABLES},
196
    {"VERBOSE",       TK_VERBOSE},
197
    {"VGROUPS",       TK_VGROUPS},
198
    {"VNODES",        TK_VNODES},
199
    {"WAL",           TK_WAL},
200 201 202 203 204 205
    {"WATERMARK",     TK_WATERMARK},
    {"WHERE",         TK_WHERE},
    {"WINDOW_CLOSE",  TK_WINDOW_CLOSE},
    {"_QENDTS",       TK_QENDTS},
    {"_QSTARTTS",     TK_QSTARTTS},
    {"_ROWTS",        TK_ROWTS},
206 207
    {"_WDURATION",    TK_WDURATION},
    {"_WENDTS",       TK_WENDTS},
X
Xiaoyu Wang 已提交
208
    {"_WSTARTTS",     TK_WSTARTTS},
209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272
    // {"ID",           TK_ID},
    // {"STRING",       TK_STRING},
    // {"EQ",           TK_EQ},
    // {"NE",           TK_NE},
    // {"ISNULL",       TK_ISNULL},
    // {"NOTNULL",      TK_NOTNULL},
    // {"GLOB",         TK_GLOB},
    // {"GT",           TK_GT},
    // {"GE",           TK_GE},
    // {"LT",           TK_LT},
    // {"LE",           TK_LE},
    // {"BITAND",       TK_BITAND},
    // {"BITOR",        TK_BITOR},
    // {"LSHIFT",       TK_LSHIFT},
    // {"RSHIFT",       TK_RSHIFT},
    // {"PLUS",         TK_PLUS},
    // {"DIVIDE",       TK_DIVIDE},
    // {"TIMES",        TK_TIMES},
    // {"STAR",         TK_STAR},
    // {"SLASH",        TK_SLASH},
    // {"REM ",         TK_REM},
    // {"||",           TK_CONCAT},
    // {"UMINUS",       TK_UMINUS},
    // {"UPLUS",        TK_UPLUS},
    // {"BITNOT",       TK_BITNOT},
    // {"DOT",          TK_DOT},
    // {"CTIME",        TK_CTIME},
    // {"LP",           TK_LP},
    // {"RP",           TK_RP},
    // {"COMMA",        TK_COMMA},
    // {"EVERY",        TK_EVERY},
    // {"VARIABLE",     TK_VARIABLE},
    // {"UPDATE",       TK_UPDATE},
    // {"CHANGE",       TK_CHANGE},
    // {"COLON",        TK_COLON},
    // {"ABORT",        TK_ABORT},
    // {"AFTER",        TK_AFTER},
    // {"ATTACH",       TK_ATTACH},
    // {"BEFORE",       TK_BEFORE},
    // {"BEGIN",        TK_BEGIN},
    // {"CASCADE",      TK_CASCADE},
    // {"CLUSTER",      TK_CLUSTER},
    // {"CONFLICT",     TK_CONFLICT},
    // {"COPY",         TK_COPY},
    // {"DEFERRED",     TK_DEFERRED},
    // {"DELIMITERS",   TK_DELIMITERS},
    // {"DETACH",       TK_DETACH},
    // {"EACH",         TK_EACH},
    // {"END",          TK_END},
    // {"FAIL",         TK_FAIL},
    // {"FOR",          TK_FOR},
    // {"IGNORE",       TK_IGNORE},
    // {"IMMEDIATE",    TK_IMMEDIATE},
    // {"INITIALLY",    TK_INITIALLY},
    // {"INSTEAD",      TK_INSTEAD},
    // {"KEY",          TK_KEY},
    // {"OF",           TK_OF},
    // {"RAISE",        TK_RAISE},
    // {"REPLACE",      TK_REPLACE},
    // {"RESTRICT",     TK_RESTRICT},
    // {"ROW",          TK_ROW},
    // {"STATEMENT",    TK_STATEMENT},
    // {"VIEW",         TK_VIEW},
    // {"SEMI",         TK_SEMI},
273
    // {"PARTITIONS",   TK_PARTITIONS},
274
    // {"MODE",         TK_MODE},
H
hzcheng 已提交
275 276 277 278 279 280 281 282 283 284 285 286 287 288
};

static const char isIdChar[] = {
    /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
};

H
Haojun Liao 已提交
289
static void* keywordHashTable = NULL;
H
hzcheng 已提交
290

S
TD-1057  
Shengliang Guan 已提交
291
static void doInitKeywordsTable(void) {
292 293
  int numOfEntries = tListLen(keywordTable);
  
H
Haojun Liao 已提交
294
  keywordHashTable = taosHashInit(numOfEntries, MurmurHash3_32, true, false);
295
  for (int32_t i = 0; i < numOfEntries; i++) {
S
TD-1057  
Shengliang Guan 已提交
296
    keywordTable[i].len = (uint8_t)strlen(keywordTable[i].name);
297
    void* ptr = &keywordTable[i];
H
Haojun Liao 已提交
298
    taosHashPut(keywordHashTable, keywordTable[i].name, keywordTable[i].len, (void*)&ptr, POINTER_BYTES);
H
hzcheng 已提交
299
  }
300 301
}

wafwerar's avatar
wafwerar 已提交
302
static TdThreadOnce keywordsHashTableInit = PTHREAD_ONCE_INIT;
303

304
static int32_t tKeywordCode(const char* z, int n) {
wafwerar's avatar
wafwerar 已提交
305
  taosThreadOnce(&keywordsHashTableInit, doInitKeywordsTable);
H
hjxilinx 已提交
306 307
  
  char key[512] = {0};
308
  if (n > tListLen(key)) { // too long token, can not be any other token type
309
    return TK_NK_ID;
310
  }
H
hjxilinx 已提交
311
  
H
hzcheng 已提交
312 313
  for (int32_t j = 0; j < n; ++j) {
    if (z[j] >= 'a' && z[j] <= 'z') {
314
      key[j] = (char)(z[j] & 0xDF);  // to uppercase and set the null-terminated
H
hzcheng 已提交
315 316 317 318 319
    } else {
      key[j] = z[j];
    }
  }

D
dapan1121 已提交
320
  if (keywordHashTable == NULL) {
321
    return TK_NK_ILLEGAL;
D
dapan1121 已提交
322
  }
H
Haojun Liao 已提交
323

H
Haojun Liao 已提交
324
  SKeyword** pKey = (SKeyword**)taosHashGet(keywordHashTable, key, n);
325
  return (pKey != NULL)? (*pKey)->type:TK_NK_ID;
H
hzcheng 已提交
326 327
}

H
huili 已提交
328
/*
329 330 331
 * Return the length of the token that begins at z[0].
 * Store the token type in *type before returning.
 */
332
uint32_t tGetToken(const char* z, uint32_t* tokenId) {
333
  uint32_t i;
H
hzcheng 已提交
334 335 336 337 338 339 340 341
  switch (*z) {
    case ' ':
    case '\t':
    case '\n':
    case '\f':
    case '\r': {
      for (i = 1; isspace(z[i]); i++) {
      }
342
      *tokenId = TK_NK_SPACE;
H
hzcheng 已提交
343 344 345
      return i;
    }
    case ':': {
346
      *tokenId = TK_NK_COLON;
H
hzcheng 已提交
347 348 349 350 351 352
      return 1;
    }
    case '-': {
      if (z[1] == '-') {
        for (i = 2; z[i] && z[i] != '\n'; i++) {
        }
353
        *tokenId = TK_NK_COMMENT;
H
hzcheng 已提交
354
        return i;
355 356 357
      } else if (z[1] == '>') {
        *tokenId = TK_NK_ARROW;
        return 2;
H
hzcheng 已提交
358
      }
X
Xiaoyu Wang 已提交
359
      *tokenId = TK_NK_MINUS;
H
hzcheng 已提交
360 361 362
      return 1;
    }
    case '(': {
363
      *tokenId = TK_NK_LP;
H
hzcheng 已提交
364 365 366
      return 1;
    }
    case ')': {
367
      *tokenId = TK_NK_RP;
H
hzcheng 已提交
368 369 370
      return 1;
    }
    case ';': {
371
      *tokenId = TK_NK_SEMI;
H
hzcheng 已提交
372 373 374
      return 1;
    }
    case '+': {
375
      *tokenId = TK_NK_PLUS;
H
hzcheng 已提交
376 377 378
      return 1;
    }
    case '*': {
379
      *tokenId = TK_NK_STAR;
H
hzcheng 已提交
380 381 382 383
      return 1;
    }
    case '/': {
      if (z[1] != '*' || z[2] == 0) {
384
        *tokenId = TK_NK_SLASH;
H
hzcheng 已提交
385 386 387 388 389
        return 1;
      }
      for (i = 3; z[i] && (z[i] != '/' || z[i - 1] != '*'); i++) {
      }
      if (z[i]) i++;
390
      *tokenId = TK_NK_COMMENT;
H
hzcheng 已提交
391 392 393
      return i;
    }
    case '%': {
394
      *tokenId = TK_NK_REM;
H
hzcheng 已提交
395 396 397
      return 1;
    }
    case '=': {
398
      *tokenId = TK_NK_EQ;
H
hzcheng 已提交
399 400 401 402
      return 1 + (z[1] == '=');
    }
    case '<': {
      if (z[1] == '=') {
403
        *tokenId = TK_NK_LE;
H
hzcheng 已提交
404 405
        return 2;
      } else if (z[1] == '>') {
406
        *tokenId = TK_NK_NE;
H
hzcheng 已提交
407 408
        return 2;
      } else if (z[1] == '<') {
409
        *tokenId = TK_NK_LSHIFT;
H
hzcheng 已提交
410 411
        return 2;
      } else {
412
        *tokenId = TK_NK_LT;
H
hzcheng 已提交
413 414 415 416 417
        return 1;
      }
    }
    case '>': {
      if (z[1] == '=') {
418
        *tokenId = TK_NK_GE;
H
hzcheng 已提交
419 420
        return 2;
      } else if (z[1] == '>') {
421
        *tokenId = TK_NK_RSHIFT;
H
hzcheng 已提交
422 423
        return 2;
      } else {
424
        *tokenId = TK_NK_GT;
H
hzcheng 已提交
425 426 427 428 429
        return 1;
      }
    }
    case '!': {
      if (z[1] != '=') {
430
        *tokenId = TK_NK_ILLEGAL;
H
hzcheng 已提交
431 432
        return 2;
      } else {
433
        *tokenId = TK_NK_NE;
H
hzcheng 已提交
434 435 436 437 438
        return 2;
      }
    }
    case '|': {
      if (z[1] != '|') {
439
        *tokenId = TK_NK_BITOR;
H
hzcheng 已提交
440 441
        return 1;
      } else {
442
        *tokenId = TK_NK_CONCAT;
H
hzcheng 已提交
443 444 445 446
        return 2;
      }
    }
    case ',': {
447
      *tokenId = TK_NK_COMMA;
H
hzcheng 已提交
448 449 450
      return 1;
    }
    case '&': {
451
      *tokenId = TK_NK_BITAND;
H
hzcheng 已提交
452 453 454
      return 1;
    }
    case '~': {
455
      *tokenId = TK_NK_BITNOT;
H
hzcheng 已提交
456 457
      return 1;
    }
S
slguan 已提交
458
    case '?': {
459
      *tokenId = TK_NK_QUESTION;
S
slguan 已提交
460 461
      return 1;
    }
462
    case '`':
H
hzcheng 已提交
463 464
    case '\'':
    case '"': {
S
slguan 已提交
465 466
      int  delim = z[0];
      bool strEnd = false;
H
hzcheng 已提交
467
      for (i = 1; z[i]; i++) {
H
Haojun Liao 已提交
468
        if (z[i] == '\\') {   // ignore the escaped character that follows this backslash
L
[1292]  
lihui 已提交
469 470 471 472
          i++;
          continue;
        }
        
473
        if (z[i] == delim) {
H
hzcheng 已提交
474 475 476
          if (z[i + 1] == delim) {
            i++;
          } else {
H
huili 已提交
477
            strEnd = true;
H
hzcheng 已提交
478 479 480 481
            break;
          }
        }
      }
L
[1292]  
lihui 已提交
482
      
H
hzcheng 已提交
483
      if (z[i]) i++;
H
huili 已提交
484

S
slguan 已提交
485
      if (strEnd) {
486
        *tokenId = (delim == '`')? TK_NK_ID:TK_NK_STRING;
S
slguan 已提交
487 488
        return i;
      }
H
huili 已提交
489

S
slguan 已提交
490
      break;
H
hzcheng 已提交
491 492
    }
    case '.': {
S
slguan 已提交
493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509
      /*
       * handle the the float number with out integer part
       * .123
       * .123e4
       */
      if (isdigit(z[1])) {
        for (i = 2; isdigit(z[i]); i++) {
        }

        if ((z[i] == 'e' || z[i] == 'E') &&
            (isdigit(z[i + 1]) || ((z[i + 1] == '+' || z[i + 1] == '-') && isdigit(z[i + 2])))) {
          i += 2;
          while (isdigit(z[i])) {
            i++;
          }
        }

510
        *tokenId = TK_NK_FLOAT;
S
slguan 已提交
511 512
        return i;
      } else {
513
        *tokenId = TK_NK_DOT;
S
slguan 已提交
514 515 516 517 518 519 520 521
        return 1;
      }
    }

    case '0': {
      char next = z[1];

      if (next == 'b') { // bin number
522
        *tokenId = TK_NK_BIN;
S
slguan 已提交
523 524 525 526 527 528 529 530 531
        for (i = 2; (z[i] == '0' || z[i] == '1'); ++i) {
        }

        if (i == 2) {
          break;
        }

        return i;
      } else if (next == 'x') {  //hex number
532
        *tokenId = TK_NK_HEX;
S
slguan 已提交
533 534 535 536 537 538 539 540 541
        for (i = 2; isdigit(z[i]) || (z[i] >= 'a' && z[i] <= 'f') || (z[i] >= 'A' && z[i] <= 'F'); ++i) {
        }

        if (i == 2) {
          break;
        }

        return i;
      }
H
hzcheng 已提交
542 543 544 545 546 547 548 549 550 551
    }
    case '1':
    case '2':
    case '3':
    case '4':
    case '5':
    case '6':
    case '7':
    case '8':
    case '9': {
552
      *tokenId = TK_NK_INTEGER;
H
hzcheng 已提交
553 554 555
      for (i = 1; isdigit(z[i]); i++) {
      }

H
Haojun Liao 已提交
556
      /* here is the 1u/1a/2s/3m/9y */
557
      if ((z[i] == 'b' || z[i] == 'u' || z[i] == 'a' || z[i] == 's' || z[i] == 'm' || z[i] == 'h' || z[i] == 'd' || z[i] == 'n' ||
H
Haojun Liao 已提交
558
           z[i] == 'y' || z[i] == 'w' ||
559
           z[i] == 'B' || z[i] == 'U' || z[i] == 'A' || z[i] == 'S' || z[i] == 'M' || z[i] == 'H' || z[i] == 'D' || z[i] == 'N' ||
H
Haojun Liao 已提交
560
           z[i] == 'Y' || z[i] == 'W') &&
H
hjxilinx 已提交
561
          (isIdChar[(uint8_t)z[i + 1]] == 0)) {
562
        *tokenId = TK_NK_VARIABLE;
H
hzcheng 已提交
563 564 565 566 567 568 569 570 571 572
        i += 1;
        return i;
      }

      int32_t seg = 1;
      while (z[i] == '.' && isdigit(z[i + 1])) {
        i += 2;
        while (isdigit(z[i])) {
          i++;
        }
573
        *tokenId = TK_NK_FLOAT;
H
hzcheng 已提交
574 575 576 577
        seg++;
      }

      if (seg == 4) {  // ip address
578
        *tokenId = TK_NK_IPTOKEN;
H
hzcheng 已提交
579 580 581 582 583 584 585 586 587
        return i;
      }

      if ((z[i] == 'e' || z[i] == 'E') &&
          (isdigit(z[i + 1]) || ((z[i + 1] == '+' || z[i + 1] == '-') && isdigit(z[i + 2])))) {
        i += 2;
        while (isdigit(z[i])) {
          i++;
        }
588
        *tokenId = TK_NK_FLOAT;
H
hzcheng 已提交
589 590 591 592 593 594
      }
      return i;
    }
    case '[': {
      for (i = 1; z[i] && z[i - 1] != ']'; i++) {
      }
595
      *tokenId = TK_NK_ID;
H
hzcheng 已提交
596 597 598 599 600 601
      return i;
    }
    case 'T':
    case 't':
    case 'F':
    case 'f': {
H
hjxilinx 已提交
602
      for (i = 1; ((z[i] & 0x80) == 0) && isIdChar[(uint8_t) z[i]]; i++) {
H
hzcheng 已提交
603 604 605
      }

      if ((i == 4 && strncasecmp(z, "true", 4) == 0) || (i == 5 && strncasecmp(z, "false", 5) == 0)) {
606
        *tokenId = TK_NK_BOOL;
H
hzcheng 已提交
607 608 609 610
        return i;
      }
    }
    default: {
H
hjxilinx 已提交
611
      if (((*z & 0x80) != 0) || !isIdChar[(uint8_t) *z]) {
H
hzcheng 已提交
612 613
        break;
      }
H
hjxilinx 已提交
614
      for (i = 1; ((z[i] & 0x80) == 0) && isIdChar[(uint8_t) z[i]]; i++) {
H
hzcheng 已提交
615
      }
616
      *tokenId = tKeywordCode(z, i);
H
hzcheng 已提交
617 618 619 620
      return i;
    }
  }

621
  *tokenId = TK_NK_ILLEGAL;
H
hzcheng 已提交
622 623 624
  return 0;
}

H
Haojun Liao 已提交
625
SToken tscReplaceStrToken(char **str, SToken *token, const char* newToken) {
D
dapan1121 已提交
626
  char *src = *str;
D
dapan1121 已提交
627 628 629
  size_t nsize = strlen(newToken);
  int32_t size = (int32_t)strlen(*str) - token->n + (int32_t)nsize + 1;
  int32_t bsize = (int32_t)((uint64_t)token->z - (uint64_t)src);
H
Haojun Liao 已提交
630
  SToken ntoken;
D
dapan1121 已提交
631

wafwerar's avatar
wafwerar 已提交
632
  *str = taosMemoryCalloc(1, size);
D
dapan1121 已提交
633 634

  strncpy(*str, src, bsize);
H
Haojun Liao 已提交
635
  strcat(*str, newToken);
D
dapan1121 已提交
636 637
  strcat(*str, token->z + token->n);

D
dapan1121 已提交
638
  ntoken.n = (uint32_t)nsize;
D
dapan1121 已提交
639 640
  ntoken.z = *str + bsize;

wafwerar's avatar
wafwerar 已提交
641
  taosMemoryFreeClear(src);
D
dapan1121 已提交
642 643 644 645

  return ntoken;
}

646
SToken tStrGetToken(const char* str, int32_t* i, bool isPrevOptr) {
H
Haojun Liao 已提交
647
  SToken t0 = {0};
S
slguan 已提交
648

H
hzcheng 已提交
649 650
  // here we reach the end of sql string, null-terminated string
  if (str[*i] == 0) {
S
slguan 已提交
651 652
    t0.n = 0;
    return t0;
H
hzcheng 已提交
653 654
  }

655
  // IGNORE TK_NK_SPACE, TK_NK_COMMA, and specified tokens
S
slguan 已提交
656 657 658
  while (1) {
    *i += t0.n;

H
Haojun Liao 已提交
659 660 661 662 663 664
    int32_t numOfComma = 0;
    char t = str[*i];
    while (t == ' ' || t == '\n' || t == '\r' || t == '\t' || t == '\f' || t == ',') {
      if (t == ',' && (++numOfComma > 1)) {  // comma only allowed once
        t0.n = 0;
        return t0;
S
slguan 已提交
665
      }
H
Haojun Liao 已提交
666
    
H
Haojun Liao 已提交
667
      t = str[++(*i)];
S
slguan 已提交
668
    }
H
hzcheng 已提交
669

670
    t0.n = tGetToken(&str[*i], &t0.type);
H
Haojun Liao 已提交
671
    break;
S
slguan 已提交
672

H
Haojun Liao 已提交
673 674
    // not support user specfied ignored symbol list
#if 0
H
Haojun Liao 已提交
675
    bool ignore = false;
S
slguan 已提交
676 677
    for (uint32_t k = 0; k < numOfIgnoreToken; k++) {
      if (t0.type == ignoreTokenTypes[k]) {
H
Haojun Liao 已提交
678
        ignore = true;
S
slguan 已提交
679 680 681 682
        break;
      }
    }

H
Haojun Liao 已提交
683
    if (!ignore) {
S
slguan 已提交
684 685
      break;
    }
H
Haojun Liao 已提交
686
#endif
H
hzcheng 已提交
687 688
  }

689
  if (t0.type == TK_NK_SEMI) {
S
slguan 已提交
690 691 692 693 694 695 696 697 698
    t0.n = 0;
    return t0;
  }

  uint32_t type = 0;
  int32_t  len;

  // support parse the 'db.tbl' format, notes: There should be no space on either side of the dot!
  if ('.' == str[*i + t0.n]) {
699
    len = tGetToken(&str[*i + t0.n + 1], &type);
S
slguan 已提交
700 701

    // only id and string are valid
702
    if ((TK_NK_STRING != t0.type) && (TK_NK_ID != t0.type)) {
703
      t0.type = TK_NK_ILLEGAL;
S
slguan 已提交
704 705 706 707 708 709 710 711 712
      t0.n = 0;

      return t0;
    }

    t0.n += len + 1;

  } else {
    // support parse the -/+number format
X
Xiaoyu Wang 已提交
713
    if ((isPrevOptr) && (t0.type == TK_NK_MINUS || t0.type == TK_NK_PLUS)) {
714
      len = tGetToken(&str[*i + t0.n], &type);
715
      if (type == TK_NK_INTEGER || type == TK_NK_FLOAT) {
S
slguan 已提交
716 717 718
        t0.type = type;
        t0.n += len;
      }
H
hzcheng 已提交
719 720 721
    }
  }

722
  t0.z = (char*) str + (*i);
S
slguan 已提交
723 724 725
  *i += t0.n;

  return t0;
H
hzcheng 已提交
726 727
}

728
bool taosIsKeyWordToken(const char* z, int32_t len) {
729
  return (tKeywordCode((char*)z, len) != TK_NK_ID);
730
}
H
Haojun Liao 已提交
731 732

void taosCleanupKeywordsTable() {
H
Haojun Liao 已提交
733 734 735 736
  void* m = keywordHashTable;
  if (m != NULL && atomic_val_compare_exchange_ptr(&keywordHashTable, m, 0) == m) {
    taosHashCleanup(m);
  }
Y
yihaoDeng 已提交
737
}
738

H
Haojun Liao 已提交
739
SToken taosTokenDup(SToken* pToken, char* buf, int32_t len) {
740 741 742 743 744
  assert(pToken != NULL && buf != NULL && len > pToken->n);
  
  strncpy(buf, pToken->z, pToken->n);
  buf[pToken->n] = 0;

H
Haojun Liao 已提交
745
  SToken token = *pToken;
746 747 748
  token.z = buf;
  return token;
}