ttokenizer.c 18.3 KB
Newer Older
H
hzcheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

H
hjxilinx 已提交
16 17
#include "os.h"

H
Haojun Liao 已提交
18
#include "thash.h"
H
hjxilinx 已提交
19
#include "taosdef.h"
20
#include "ttoken.h"
21
#include "ttokendef.h"
H
hzcheng 已提交
22

S
slguan 已提交
23
// All the keywords of the SQL language are stored in a hash table
H
hzcheng 已提交
24
typedef struct SKeyword {
S
slguan 已提交
25
  const char* name;  // The keyword name
H
Haojun Liao 已提交
26
  uint16_t    type;  // type
S
slguan 已提交
27
  uint8_t     len;   // length
H
hzcheng 已提交
28 29
} SKeyword;

S
slguan 已提交
30
// keywords in sql string
H
hzcheng 已提交
31
static SKeyword keywordTable[] = {
32 33
    // {"ID",           TK_ID},
    // {"BOOL",         TK_BOOL},
34 35
//    {"TINYINT",      TK_TINYINT},
//    {"SMALLINT",     TK_SMALLINT},
36 37
    // {"INTEGER",      TK_INTEGER},
    // {"INT",          TK_INTEGER},
38
//    {"BIGINT",       TK_BIGINT},
39
    // {"FLOAT",        TK_FLOAT},
40
//    {"DOUBLE",       TK_DOUBLE},
41
    // {"STRING",       TK_STRING},
S
slguan 已提交
42
    {"TIMESTAMP",    TK_TIMESTAMP},
43 44
//    {"BINARY",       TK_BINARY},
//    {"NCHAR",        TK_NCHAR},
S
slguan 已提交
45 46 47
    {"OR",           TK_OR},
    {"AND",          TK_AND},
    {"NOT",          TK_NOT},
48 49 50 51
    // {"EQ",           TK_EQ},
    // {"NE",           TK_NE},
    // {"ISNULL",       TK_ISNULL},
    // {"NOTNULL",      TK_NOTNULL},
S
slguan 已提交
52 53
    {"IS",           TK_IS},
    {"LIKE",         TK_LIKE},
54
    {"MATCH",        TK_MATCH},
55
    // {"GLOB",         TK_GLOB},
S
slguan 已提交
56 57
    {"BETWEEN",      TK_BETWEEN},
    {"IN",           TK_IN},
58 59 60 61 62 63 64 65 66
    // {"GT",           TK_GT},
    // {"GE",           TK_GE},
    // {"LT",           TK_LT},
    // {"LE",           TK_LE},
    // {"BITAND",       TK_BITAND},
    // {"BITOR",        TK_BITOR},
    // {"LSHIFT",       TK_LSHIFT},
    // {"RSHIFT",       TK_RSHIFT},
    // {"PLUS",         TK_PLUS},
S
slguan 已提交
67
    {"MINUS",        TK_MINUS},
68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
    // {"DIVIDE",       TK_DIVIDE},
    // {"TIMES",        TK_TIMES},
    // {"STAR",         TK_STAR},
    // {"SLASH",        TK_SLASH},
    // {"REM ",         TK_REM},
    // {"||",           TK_CONCAT},
    // {"UMINUS",       TK_UMINUS},
    // {"UPLUS",        TK_UPLUS},
    // {"BITNOT",       TK_BITNOT},
    // {"SHOW",         TK_SHOW},
    // {"DATABASES",    TK_DATABASES},
    // {"MNODES",       TK_MNODES},
    // {"DNODES",       TK_DNODES},
    // {"ACCOUNTS",     TK_ACCOUNTS},
    // {"USERS",        TK_USERS},
    // {"MODULES",      TK_MODULES},
    // {"QUERIES",      TK_QUERIES},
    // {"CONNECTIONS",  TK_CONNECTIONS},
    // {"STREAMS",      TK_STREAMS},
    // {"VARIABLES",    TK_VARIABLES},
    // {"SCORES",       TK_SCORES},
    // {"GRANTS",       TK_GRANTS},
    // {"DOT",          TK_DOT},
    // {"TABLES",       TK_TABLES},
    // {"STABLES",      TK_STABLES},
S
slguan 已提交
93
    {"VGROUPS",      TK_VGROUPS},
94 95
    // {"DROP",         TK_DROP},
    // {"TABLE",        TK_TABLE},
S
slguan 已提交
96
    {"DATABASE",     TK_DATABASE},
97 98 99 100 101 102 103 104 105 106
    // {"DNODE",        TK_DNODE},
    // {"USER",         TK_USER},
    // {"ACCOUNT",      TK_ACCOUNT},
    // {"USE",          TK_USE},
    // {"DESCRIBE",     TK_DESCRIBE},
    // {"SYNCDB",       TK_SYNCDB},
    // {"ALTER",        TK_ALTER},
    // {"PASS",         TK_PASS},
    // {"PRIVILEGE",    TK_PRIVILEGE},
    // {"LOCAL",        TK_LOCAL},
S
slguan 已提交
107 108 109
    {"IF",           TK_IF},
    {"EXISTS",       TK_EXISTS},
    {"CREATE",       TK_CREATE},
110 111 112 113 114 115 116
    // {"PPS",          TK_PPS},
    // {"TSERIES",      TK_TSERIES},
    // {"DBS",          TK_DBS},
    // {"STORAGE",      TK_STORAGE},
    // {"QTIME",        TK_QTIME},
    // {"CONNS",        TK_CONNS},
    // {"STATE",        TK_STATE},
S
slguan 已提交
117 118
    {"KEEP",         TK_KEEP},
    {"REPLICA",      TK_REPLICA},
119
    {"QUORUM",       TK_QUORUM},
S
slguan 已提交
120
    {"DAYS",         TK_DAYS},
H
hjxilinx 已提交
121 122 123
    {"MINROWS",      TK_MINROWS},
    {"MAXROWS",      TK_MAXROWS},
    {"BLOCKS",       TK_BLOCKS},
S
slguan 已提交
124
    {"CACHE",        TK_CACHE},
125
    // {"CTIME",        TK_CTIME},
H
hjxilinx 已提交
126
    {"WAL",          TK_WAL},
陶建辉(Jeff)'s avatar
sql.y  
陶建辉(Jeff) 已提交
127
    {"FSYNC",        TK_FSYNC},
S
slguan 已提交
128 129
    {"COMP",         TK_COMP},
    {"PRECISION",    TK_PRECISION},
130 131 132
    // {"LP",           TK_LP},
    // {"RP",           TK_RP},
    // {"UNSIGNED",     TK_UNSIGNED},
S
slguan 已提交
133 134 135
    {"TAGS",         TK_TAGS},
    {"USING",        TK_USING},
    {"AS",           TK_AS},
136
    // {"COMMA",        TK_COMMA},
S
slguan 已提交
137 138
    {"NULL",         TK_NULL},
    {"SELECT",       TK_SELECT},
139
    // {"EVERY",        TK_EVERY},
S
slguan 已提交
140
    {"FROM",         TK_FROM},
141
    // {"VARIABLE",     TK_VARIABLE},
S
slguan 已提交
142
    {"INTERVAL",     TK_INTERVAL},
143
    {"SESSION",      TK_SESSION},
144
    {"STATE_WINDOW", TK_STATE_WINDOW},
S
slguan 已提交
145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
    {"FILL",         TK_FILL},
    {"SLIDING",      TK_SLIDING},
    {"ORDER",        TK_ORDER},
    {"BY",           TK_BY},
    {"ASC",          TK_ASC},
    {"DESC",         TK_DESC},
    {"GROUP",        TK_GROUP},
    {"HAVING",       TK_HAVING},
    {"LIMIT",        TK_LIMIT},
    {"OFFSET",       TK_OFFSET},
    {"SLIMIT",       TK_SLIMIT},
    {"SOFFSET",      TK_SOFFSET},
    {"WHERE",        TK_WHERE},
    {"NOW",          TK_NOW},
    {"INSERT",       TK_INSERT},
    {"INTO",         TK_INTO},
    {"VALUES",       TK_VALUES},
162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194
    // {"UPDATE",       TK_UPDATE},
    // {"RESET",        TK_RESET},
    // {"QUERY",        TK_QUERY},
    // {"ADD",          TK_ADD},
    // {"COLUMN",       TK_COLUMN},
    // {"TAG",          TK_TAG},
    // {"CHANGE",       TK_CHANGE},
    // {"SET",          TK_SET},
    // {"KILL",         TK_KILL},
    // {"CONNECTION",   TK_CONNECTION},
    // {"COLON",        TK_COLON},
    // {"STREAM",       TK_STREAM},
    // {"ABORT",        TK_ABORT},
    // {"AFTER",        TK_AFTER},
    // {"ATTACH",       TK_ATTACH},
    // {"BEFORE",       TK_BEFORE},
    // {"BEGIN",        TK_BEGIN},
    // {"CASCADE",      TK_CASCADE},
    // {"CLUSTER",      TK_CLUSTER},
    // {"CONFLICT",     TK_CONFLICT},
    // {"COPY",         TK_COPY},
    // {"DEFERRED",     TK_DEFERRED},
    // {"DELIMITERS",   TK_DELIMITERS},
    // {"DETACH",       TK_DETACH},
    // {"EACH",         TK_EACH},
    // {"END",          TK_END},
    // {"EXPLAIN",      TK_EXPLAIN},
    // {"FAIL",         TK_FAIL},
    // {"FOR",          TK_FOR},
    // {"IGNORE",       TK_IGNORE},
    // {"IMMEDIATE",    TK_IMMEDIATE},
    // {"INITIALLY",    TK_INITIALLY},
    // {"INSTEAD",      TK_INSTEAD},
S
slguan 已提交
195
    {"MATCH",        TK_MATCH},
196
    {"NMATCH",       TK_NMATCH},
197 198 199 200 201 202 203 204 205
    // {"KEY",          TK_KEY},
    // {"OF",           TK_OF},
    // {"RAISE",        TK_RAISE},
    // {"REPLACE",      TK_REPLACE},
    // {"RESTRICT",     TK_RESTRICT},
    // {"ROW",          TK_ROW},
    // {"STATEMENT",    TK_STATEMENT},
    // {"TRIGGER",      TK_TRIGGER},
    // {"VIEW",         TK_VIEW},
S
slguan 已提交
206
    {"ALL",          TK_ALL},
207
    // {"SEMI",         TK_SEMI},
S
slguan 已提交
208 209 210 211
    {"NONE",         TK_NONE},
    {"PREV",         TK_PREV},
    {"LINEAR",       TK_LINEAR},
    {"IMPORT",       TK_IMPORT},
212
    // {"TBNAME",       TK_TBNAME},
S
slguan 已提交
213
    {"JOIN",         TK_JOIN},
214
    // {"STABLE",       TK_STABLE},
S
slguan 已提交
215
    {"FILE",         TK_FILE},
216
    // {"VNODES",       TK_VNODES},
217
    {"UNION",        TK_UNION},
D
dapan1121 已提交
218
    {"CACHELAST",    TK_CACHELAST},
dengyihao's avatar
TD-2571  
dengyihao 已提交
219
    {"DISTINCT",     TK_DISTINCT},
220
//    {"PARTITIONS",   TK_PARTITIONS},
221 222 223 224 225 226 227 228 229 230 231 232 233
    // {"TOPIC",        TK_TOPIC},
    // {"TOPICS",       TK_TOPICS},
    // {"COMPACT",      TK_COMPACT},
    // {"MODIFY",       TK_MODIFY},
    // {"FUNCTION",     TK_FUNCTION},
    // {"FUNCTIONS",    TK_FUNCTIONS},
    // {"OUTPUTTYPE",   TK_OUTPUTTYPE},
    // {"AGGREGATE",    TK_AGGREGATE},
    // {"BUFSIZE",      TK_BUFSIZE},
    // {"PORT",         TK_PORT},
    {"INNER",        TK_INNER},
    {"ON",           TK_ON},
    // {"MODE",         TK_MODE},
H
hzcheng 已提交
234 235 236 237 238 239 240 241 242 243 244 245 246 247
};

static const char isIdChar[] = {
    /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
};

H
Haojun Liao 已提交
248
static void* keywordHashTable = NULL;
H
hzcheng 已提交
249

S
TD-1057  
Shengliang Guan 已提交
250
static void doInitKeywordsTable(void) {
251 252
  int numOfEntries = tListLen(keywordTable);
  
H
Haojun Liao 已提交
253
  keywordHashTable = taosHashInit(numOfEntries, MurmurHash3_32, true, false);
254
  for (int32_t i = 0; i < numOfEntries; i++) {
S
TD-1057  
Shengliang Guan 已提交
255
    keywordTable[i].len = (uint8_t)strlen(keywordTable[i].name);
256
    void* ptr = &keywordTable[i];
H
Haojun Liao 已提交
257
    taosHashPut(keywordHashTable, keywordTable[i].name, keywordTable[i].len, (void*)&ptr, POINTER_BYTES);
H
hzcheng 已提交
258
  }
259 260 261 262
}

static pthread_once_t keywordsHashTableInit = PTHREAD_ONCE_INIT;

263
static int32_t tKeywordCode(const char* z, int n) {
264
  pthread_once(&keywordsHashTableInit, doInitKeywordsTable);
H
hjxilinx 已提交
265 266
  
  char key[512] = {0};
267
  if (n > tListLen(key)) { // too long token, can not be any other token type
268
    return TK_NK_ID;
269
  }
H
hjxilinx 已提交
270
  
H
hzcheng 已提交
271 272
  for (int32_t j = 0; j < n; ++j) {
    if (z[j] >= 'a' && z[j] <= 'z') {
273
      key[j] = (char)(z[j] & 0xDF);  // to uppercase and set the null-terminated
H
hzcheng 已提交
274 275 276 277 278
    } else {
      key[j] = z[j];
    }
  }

D
dapan1121 已提交
279 280 281
  if (keywordHashTable == NULL) {
    return TK_ILLEGAL;
  }
H
Haojun Liao 已提交
282

H
Haojun Liao 已提交
283
  SKeyword** pKey = (SKeyword**)taosHashGet(keywordHashTable, key, n);
284
  return (pKey != NULL)? (*pKey)->type:TK_NK_ID;
H
hzcheng 已提交
285 286
}

H
huili 已提交
287
/*
288 289 290
 * Return the length of the token that begins at z[0].
 * Store the token type in *type before returning.
 */
291
uint32_t tGetToken(const char* z, uint32_t* tokenId) {
292
  uint32_t i;
H
hzcheng 已提交
293 294 295 296 297 298 299 300
  switch (*z) {
    case ' ':
    case '\t':
    case '\n':
    case '\f':
    case '\r': {
      for (i = 1; isspace(z[i]); i++) {
      }
H
Haojun Liao 已提交
301
      *tokenId = TK_SPACE;
H
hzcheng 已提交
302 303 304
      return i;
    }
    case ':': {
305
      *tokenId = TK_NK_COLON;
H
hzcheng 已提交
306 307 308 309 310 311
      return 1;
    }
    case '-': {
      if (z[1] == '-') {
        for (i = 2; z[i] && z[i] != '\n'; i++) {
        }
H
Haojun Liao 已提交
312
        *tokenId = TK_COMMENT;
H
hzcheng 已提交
313 314
        return i;
      }
H
Haojun Liao 已提交
315
      *tokenId = TK_MINUS;
H
hzcheng 已提交
316 317 318
      return 1;
    }
    case '(': {
319
      *tokenId = TK_NK_LP;
H
hzcheng 已提交
320 321 322
      return 1;
    }
    case ')': {
323
      *tokenId = TK_NK_RP;
H
hzcheng 已提交
324 325 326
      return 1;
    }
    case ';': {
H
Haojun Liao 已提交
327
      *tokenId = TK_SEMI;
H
hzcheng 已提交
328 329 330
      return 1;
    }
    case '+': {
331
      *tokenId = TK_NK_PLUS;
H
hzcheng 已提交
332 333 334
      return 1;
    }
    case '*': {
335
      *tokenId = TK_NK_STAR;
H
hzcheng 已提交
336 337 338 339
      return 1;
    }
    case '/': {
      if (z[1] != '*' || z[2] == 0) {
340
        *tokenId = TK_NK_SLASH;
H
hzcheng 已提交
341 342 343 344 345
        return 1;
      }
      for (i = 3; z[i] && (z[i] != '/' || z[i - 1] != '*'); i++) {
      }
      if (z[i]) i++;
H
Haojun Liao 已提交
346
      *tokenId = TK_COMMENT;
H
hzcheng 已提交
347 348 349
      return i;
    }
    case '%': {
350
      *tokenId = TK_NK_REM;
H
hzcheng 已提交
351 352 353
      return 1;
    }
    case '=': {
354
      *tokenId = TK_NK_EQ;
H
hzcheng 已提交
355 356 357 358
      return 1 + (z[1] == '=');
    }
    case '<': {
      if (z[1] == '=') {
359
        *tokenId = TK_NK_LE;
H
hzcheng 已提交
360 361
        return 2;
      } else if (z[1] == '>') {
362
        *tokenId = TK_NK_NE;
H
hzcheng 已提交
363 364
        return 2;
      } else if (z[1] == '<') {
365
        *tokenId = TK_NK_LSHIFT;
H
hzcheng 已提交
366 367
        return 2;
      } else {
368
        *tokenId = TK_NK_LT;
H
hzcheng 已提交
369 370 371 372 373
        return 1;
      }
    }
    case '>': {
      if (z[1] == '=') {
374
        *tokenId = TK_NK_GE;
H
hzcheng 已提交
375 376
        return 2;
      } else if (z[1] == '>') {
377
        *tokenId = TK_NK_RSHIFT;
H
hzcheng 已提交
378 379
        return 2;
      } else {
380
        *tokenId = TK_NK_GT;
H
hzcheng 已提交
381 382 383 384 385
        return 1;
      }
    }
    case '!': {
      if (z[1] != '=') {
H
Haojun Liao 已提交
386
        *tokenId = TK_ILLEGAL;
H
hzcheng 已提交
387 388
        return 2;
      } else {
389
        *tokenId = TK_NK_NE;
H
hzcheng 已提交
390 391 392 393 394
        return 2;
      }
    }
    case '|': {
      if (z[1] != '|') {
395
        *tokenId = TK_NK_BITOR;
H
hzcheng 已提交
396 397
        return 1;
      } else {
398
        *tokenId = TK_NK_CONCAT;
H
hzcheng 已提交
399 400 401 402
        return 2;
      }
    }
    case ',': {
403
      *tokenId = TK_NK_COMMA;
H
hzcheng 已提交
404 405 406
      return 1;
    }
    case '&': {
407
      *tokenId = TK_NK_BITAND;
H
hzcheng 已提交
408 409 410
      return 1;
    }
    case '~': {
411
      *tokenId = TK_NK_BITNOT;
H
hzcheng 已提交
412 413
      return 1;
    }
S
slguan 已提交
414
    case '?': {
H
Haojun Liao 已提交
415
      *tokenId = TK_QUESTION;
S
slguan 已提交
416 417
      return 1;
    }
418
    case '`':
H
hzcheng 已提交
419 420
    case '\'':
    case '"': {
S
slguan 已提交
421 422
      int  delim = z[0];
      bool strEnd = false;
H
hzcheng 已提交
423
      for (i = 1; z[i]; i++) {
H
Haojun Liao 已提交
424
        if (z[i] == '\\') {   // ignore the escaped character that follows this backslash
L
[1292]  
lihui 已提交
425 426 427 428
          i++;
          continue;
        }
        
429
        if (z[i] == delim) {
H
hzcheng 已提交
430 431 432
          if (z[i + 1] == delim) {
            i++;
          } else {
H
huili 已提交
433
            strEnd = true;
H
hzcheng 已提交
434 435 436 437
            break;
          }
        }
      }
L
[1292]  
lihui 已提交
438
      
H
hzcheng 已提交
439
      if (z[i]) i++;
H
huili 已提交
440

S
slguan 已提交
441
      if (strEnd) {
442
        *tokenId = (delim == '`')? TK_NK_ID:TK_NK_STRING;
S
slguan 已提交
443 444
        return i;
      }
H
huili 已提交
445

S
slguan 已提交
446
      break;
H
hzcheng 已提交
447 448
    }
    case '.': {
S
slguan 已提交
449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465
      /*
       * handle the the float number with out integer part
       * .123
       * .123e4
       */
      if (isdigit(z[1])) {
        for (i = 2; isdigit(z[i]); i++) {
        }

        if ((z[i] == 'e' || z[i] == 'E') &&
            (isdigit(z[i + 1]) || ((z[i + 1] == '+' || z[i + 1] == '-') && isdigit(z[i + 2])))) {
          i += 2;
          while (isdigit(z[i])) {
            i++;
          }
        }

466
        *tokenId = TK_NK_FLOAT;
S
slguan 已提交
467 468
        return i;
      } else {
469
        *tokenId = TK_NK_DOT;
S
slguan 已提交
470 471 472 473 474 475 476 477
        return 1;
      }
    }

    case '0': {
      char next = z[1];

      if (next == 'b') { // bin number
H
Haojun Liao 已提交
478
        *tokenId = TK_BIN;
S
slguan 已提交
479 480 481 482 483 484 485 486 487
        for (i = 2; (z[i] == '0' || z[i] == '1'); ++i) {
        }

        if (i == 2) {
          break;
        }

        return i;
      } else if (next == 'x') {  //hex number
H
Haojun Liao 已提交
488
        *tokenId = TK_HEX;
S
slguan 已提交
489 490 491 492 493 494 495 496 497
        for (i = 2; isdigit(z[i]) || (z[i] >= 'a' && z[i] <= 'f') || (z[i] >= 'A' && z[i] <= 'F'); ++i) {
        }

        if (i == 2) {
          break;
        }

        return i;
      }
H
hzcheng 已提交
498 499 500 501 502 503 504 505 506 507
    }
    case '1':
    case '2':
    case '3':
    case '4':
    case '5':
    case '6':
    case '7':
    case '8':
    case '9': {
508
      *tokenId = TK_NK_INTEGER;
H
hzcheng 已提交
509 510 511
      for (i = 1; isdigit(z[i]); i++) {
      }

H
Haojun Liao 已提交
512
      /* here is the 1u/1a/2s/3m/9y */
513
      if ((z[i] == 'b' || z[i] == 'u' || z[i] == 'a' || z[i] == 's' || z[i] == 'm' || z[i] == 'h' || z[i] == 'd' || z[i] == 'n' ||
H
Haojun Liao 已提交
514
           z[i] == 'y' || z[i] == 'w' ||
515
           z[i] == 'B' || z[i] == 'U' || z[i] == 'A' || z[i] == 'S' || z[i] == 'M' || z[i] == 'H' || z[i] == 'D' || z[i] == 'N' ||
H
Haojun Liao 已提交
516
           z[i] == 'Y' || z[i] == 'W') &&
H
hjxilinx 已提交
517
          (isIdChar[(uint8_t)z[i + 1]] == 0)) {
518
        *tokenId = TK_NK_VARIABLE;
H
hzcheng 已提交
519 520 521 522 523 524 525 526 527 528
        i += 1;
        return i;
      }

      int32_t seg = 1;
      while (z[i] == '.' && isdigit(z[i + 1])) {
        i += 2;
        while (isdigit(z[i])) {
          i++;
        }
529
        *tokenId = TK_NK_FLOAT;
H
hzcheng 已提交
530 531 532 533
        seg++;
      }

      if (seg == 4) {  // ip address
H
Haojun Liao 已提交
534
        *tokenId = TK_IPTOKEN;
H
hzcheng 已提交
535 536 537 538 539 540 541 542 543
        return i;
      }

      if ((z[i] == 'e' || z[i] == 'E') &&
          (isdigit(z[i + 1]) || ((z[i + 1] == '+' || z[i + 1] == '-') && isdigit(z[i + 2])))) {
        i += 2;
        while (isdigit(z[i])) {
          i++;
        }
544
        *tokenId = TK_NK_FLOAT;
H
hzcheng 已提交
545 546 547 548 549 550
      }
      return i;
    }
    case '[': {
      for (i = 1; z[i] && z[i - 1] != ']'; i++) {
      }
551
      *tokenId = TK_NK_ID;
H
hzcheng 已提交
552 553 554 555 556 557
      return i;
    }
    case 'T':
    case 't':
    case 'F':
    case 'f': {
H
hjxilinx 已提交
558
      for (i = 1; ((z[i] & 0x80) == 0) && isIdChar[(uint8_t) z[i]]; i++) {
H
hzcheng 已提交
559 560 561
      }

      if ((i == 4 && strncasecmp(z, "true", 4) == 0) || (i == 5 && strncasecmp(z, "false", 5) == 0)) {
562
        *tokenId = TK_NK_BOOL;
H
hzcheng 已提交
563 564 565 566
        return i;
      }
    }
    default: {
H
hjxilinx 已提交
567
      if (((*z & 0x80) != 0) || !isIdChar[(uint8_t) *z]) {
H
hzcheng 已提交
568 569
        break;
      }
H
hjxilinx 已提交
570
      for (i = 1; ((z[i] & 0x80) == 0) && isIdChar[(uint8_t) z[i]]; i++) {
H
hzcheng 已提交
571
      }
572
      *tokenId = tKeywordCode(z, i);
H
hzcheng 已提交
573 574 575 576
      return i;
    }
  }

H
Haojun Liao 已提交
577
  *tokenId = TK_ILLEGAL;
H
hzcheng 已提交
578 579 580
  return 0;
}

H
Haojun Liao 已提交
581
SToken tscReplaceStrToken(char **str, SToken *token, const char* newToken) {
D
dapan1121 已提交
582
  char *src = *str;
D
dapan1121 已提交
583 584 585
  size_t nsize = strlen(newToken);
  int32_t size = (int32_t)strlen(*str) - token->n + (int32_t)nsize + 1;
  int32_t bsize = (int32_t)((uint64_t)token->z - (uint64_t)src);
H
Haojun Liao 已提交
586
  SToken ntoken;
D
dapan1121 已提交
587 588 589 590

  *str = calloc(1, size);

  strncpy(*str, src, bsize);
H
Haojun Liao 已提交
591
  strcat(*str, newToken);
D
dapan1121 已提交
592 593
  strcat(*str, token->z + token->n);

D
dapan1121 已提交
594
  ntoken.n = (uint32_t)nsize;
D
dapan1121 已提交
595 596 597 598 599 600 601
  ntoken.z = *str + bsize;

  tfree(src);

  return ntoken;
}

602
SToken tStrGetToken(const char* str, int32_t* i, bool isPrevOptr) {
H
Haojun Liao 已提交
603
  SToken t0 = {0};
S
slguan 已提交
604

H
hzcheng 已提交
605 606
  // here we reach the end of sql string, null-terminated string
  if (str[*i] == 0) {
S
slguan 已提交
607 608
    t0.n = 0;
    return t0;
H
hzcheng 已提交
609 610
  }

S
slguan 已提交
611 612 613 614
  // IGNORE TK_SPACE, TK_COMMA, and specified tokens
  while (1) {
    *i += t0.n;

H
Haojun Liao 已提交
615 616 617 618 619 620
    int32_t numOfComma = 0;
    char t = str[*i];
    while (t == ' ' || t == '\n' || t == '\r' || t == '\t' || t == '\f' || t == ',') {
      if (t == ',' && (++numOfComma > 1)) {  // comma only allowed once
        t0.n = 0;
        return t0;
S
slguan 已提交
621
      }
H
Haojun Liao 已提交
622
    
H
Haojun Liao 已提交
623
      t = str[++(*i)];
S
slguan 已提交
624
    }
H
hzcheng 已提交
625

626
    t0.n = tGetToken(&str[*i], &t0.type);
H
Haojun Liao 已提交
627
    break;
S
slguan 已提交
628

H
Haojun Liao 已提交
629 630
    // not support user specfied ignored symbol list
#if 0
H
Haojun Liao 已提交
631
    bool ignore = false;
S
slguan 已提交
632 633
    for (uint32_t k = 0; k < numOfIgnoreToken; k++) {
      if (t0.type == ignoreTokenTypes[k]) {
H
Haojun Liao 已提交
634
        ignore = true;
S
slguan 已提交
635 636 637 638
        break;
      }
    }

H
Haojun Liao 已提交
639
    if (!ignore) {
S
slguan 已提交
640 641
      break;
    }
H
Haojun Liao 已提交
642
#endif
H
hzcheng 已提交
643 644
  }

S
slguan 已提交
645 646 647 648 649 650 651 652 653 654
  if (t0.type == TK_SEMI) {
    t0.n = 0;
    return t0;
  }

  uint32_t type = 0;
  int32_t  len;

  // support parse the 'db.tbl' format, notes: There should be no space on either side of the dot!
  if ('.' == str[*i + t0.n]) {
655
    len = tGetToken(&str[*i + t0.n + 1], &type);
S
slguan 已提交
656 657

    // only id and string are valid
658
    if ((TK_NK_STRING != t0.type) && (TK_NK_ID != t0.type)) {
S
slguan 已提交
659 660 661 662 663 664 665 666 667 668
      t0.type = TK_ILLEGAL;
      t0.n = 0;

      return t0;
    }

    t0.n += len + 1;

  } else {
    // support parse the -/+number format
669
    if ((isPrevOptr) && (t0.type == TK_MINUS || t0.type == TK_NK_PLUS)) {
670
      len = tGetToken(&str[*i + t0.n], &type);
671
      if (type == TK_NK_INTEGER || type == TK_NK_FLOAT) {
S
slguan 已提交
672 673 674
        t0.type = type;
        t0.n += len;
      }
H
hzcheng 已提交
675 676 677
    }
  }

678
  t0.z = (char*) str + (*i);
S
slguan 已提交
679 680 681
  *i += t0.n;

  return t0;
H
hzcheng 已提交
682 683
}

684
bool taosIsKeyWordToken(const char* z, int32_t len) {
685
  return (tKeywordCode((char*)z, len) != TK_NK_ID);
686
}
H
Haojun Liao 已提交
687 688

void taosCleanupKeywordsTable() {
H
Haojun Liao 已提交
689 690 691 692
  void* m = keywordHashTable;
  if (m != NULL && atomic_val_compare_exchange_ptr(&keywordHashTable, m, 0) == m) {
    taosHashCleanup(m);
  }
Y
yihaoDeng 已提交
693
}
694

H
Haojun Liao 已提交
695
SToken taosTokenDup(SToken* pToken, char* buf, int32_t len) {
696 697 698 699 700
  assert(pToken != NULL && buf != NULL && len > pToken->n);
  
  strncpy(buf, pToken->z, pToken->n);
  buf[pToken->n] = 0;

H
Haojun Liao 已提交
701
  SToken token = *pToken;
702 703 704
  token.z = buf;
  return token;
}