ttokenizer.c 17.7 KB
Newer Older
H
hzcheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

H
hjxilinx 已提交
16 17
#include "os.h"

H
Haojun Liao 已提交
18
#include "thash.h"
H
hjxilinx 已提交
19
#include "taosdef.h"
20
#include "ttoken.h"
21
#include "ttokendef.h"
H
hzcheng 已提交
22

S
slguan 已提交
23
// All the keywords of the SQL language are stored in a hash table
H
hzcheng 已提交
24
typedef struct SKeyword {
S
slguan 已提交
25
  const char* name;  // The keyword name
H
Haojun Liao 已提交
26
  uint16_t    type;  // type
S
slguan 已提交
27
  uint8_t     len;   // length
H
hzcheng 已提交
28 29
} SKeyword;

S
slguan 已提交
30
// keywords in sql string
H
hzcheng 已提交
31
static SKeyword keywordTable[] = {
S
slguan 已提交
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
    {"ID",           TK_ID},
    {"BOOL",         TK_BOOL},
    {"TINYINT",      TK_TINYINT},
    {"SMALLINT",     TK_SMALLINT},
    {"INTEGER",      TK_INTEGER},
    {"INT",          TK_INTEGER},
    {"BIGINT",       TK_BIGINT},
    {"FLOAT",        TK_FLOAT},
    {"DOUBLE",       TK_DOUBLE},
    {"STRING",       TK_STRING},
    {"TIMESTAMP",    TK_TIMESTAMP},
    {"BINARY",       TK_BINARY},
    {"NCHAR",        TK_NCHAR},
    {"OR",           TK_OR},
    {"AND",          TK_AND},
    {"NOT",          TK_NOT},
    {"EQ",           TK_EQ},
    {"NE",           TK_NE},
    {"ISNULL",       TK_ISNULL},
    {"NOTNULL",      TK_NOTNULL},
    {"IS",           TK_IS},
    {"LIKE",         TK_LIKE},
54
    {"MATCH",        TK_MATCH},
S
slguan 已提交
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
    {"GLOB",         TK_GLOB},
    {"BETWEEN",      TK_BETWEEN},
    {"IN",           TK_IN},
    {"GT",           TK_GT},
    {"GE",           TK_GE},
    {"LT",           TK_LT},
    {"LE",           TK_LE},
    {"BITAND",       TK_BITAND},
    {"BITOR",        TK_BITOR},
    {"LSHIFT",       TK_LSHIFT},
    {"RSHIFT",       TK_RSHIFT},
    {"PLUS",         TK_PLUS},
    {"MINUS",        TK_MINUS},
    {"DIVIDE",       TK_DIVIDE},
    {"TIMES",        TK_TIMES},
    {"STAR",         TK_STAR},
    {"SLASH",        TK_SLASH},
    {"REM ",         TK_REM},
73
    {"||",       TK_CONCAT},
S
slguan 已提交
74 75 76 77 78 79 80 81 82 83 84 85 86
    {"UMINUS",       TK_UMINUS},
    {"UPLUS",        TK_UPLUS},
    {"BITNOT",       TK_BITNOT},
    {"SHOW",         TK_SHOW},
    {"DATABASES",    TK_DATABASES},
    {"MNODES",       TK_MNODES},
    {"DNODES",       TK_DNODES},
    {"ACCOUNTS",     TK_ACCOUNTS},
    {"USERS",        TK_USERS},
    {"MODULES",      TK_MODULES},
    {"QUERIES",      TK_QUERIES},
    {"CONNECTIONS",  TK_CONNECTIONS},
    {"STREAMS",      TK_STREAMS},
H
Haojun Liao 已提交
87
    {"VARIABLES",    TK_VARIABLES},
S
slguan 已提交
88 89 90 91 92 93 94 95 96 97 98 99 100 101
    {"SCORES",       TK_SCORES},
    {"GRANTS",       TK_GRANTS},
    {"DOT",          TK_DOT},
    {"TABLES",       TK_TABLES},
    {"STABLES",      TK_STABLES},
    {"VGROUPS",      TK_VGROUPS},
    {"DROP",         TK_DROP},
    {"TABLE",        TK_TABLE},
    {"DATABASE",     TK_DATABASE},
    {"DNODE",        TK_DNODE},
    {"USER",         TK_USER},
    {"ACCOUNT",      TK_ACCOUNT},
    {"USE",          TK_USE},
    {"DESCRIBE",     TK_DESCRIBE},
102
    {"SYNCDB",       TK_SYNCDB},
S
slguan 已提交
103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
    {"ALTER",        TK_ALTER},
    {"PASS",         TK_PASS},
    {"PRIVILEGE",    TK_PRIVILEGE},
    {"LOCAL",        TK_LOCAL},
    {"IF",           TK_IF},
    {"EXISTS",       TK_EXISTS},
    {"CREATE",       TK_CREATE},
    {"PPS",          TK_PPS},
    {"TSERIES",      TK_TSERIES},
    {"DBS",          TK_DBS},
    {"STORAGE",      TK_STORAGE},
    {"QTIME",        TK_QTIME},
    {"CONNS",        TK_CONNS},
    {"STATE",        TK_STATE},
    {"KEEP",         TK_KEEP},
    {"REPLICA",      TK_REPLICA},
119
    {"QUORUM",       TK_QUORUM},
S
slguan 已提交
120
    {"DAYS",         TK_DAYS},
H
hjxilinx 已提交
121 122 123
    {"MINROWS",      TK_MINROWS},
    {"MAXROWS",      TK_MAXROWS},
    {"BLOCKS",       TK_BLOCKS},
S
slguan 已提交
124 125
    {"CACHE",        TK_CACHE},
    {"CTIME",        TK_CTIME},
H
hjxilinx 已提交
126
    {"WAL",          TK_WAL},
陶建辉(Jeff)'s avatar
sql.y  
陶建辉(Jeff) 已提交
127
    {"FSYNC",        TK_FSYNC},
S
slguan 已提交
128 129 130 131
    {"COMP",         TK_COMP},
    {"PRECISION",    TK_PRECISION},
    {"LP",           TK_LP},
    {"RP",           TK_RP},
132
    {"UNSIGNED",     TK_UNSIGNED},
S
slguan 已提交
133 134 135 136 137 138
    {"TAGS",         TK_TAGS},
    {"USING",        TK_USING},
    {"AS",           TK_AS},
    {"COMMA",        TK_COMMA},
    {"NULL",         TK_NULL},
    {"SELECT",       TK_SELECT},
139
    {"EVERY",        TK_EVERY},
S
slguan 已提交
140 141 142
    {"FROM",         TK_FROM},
    {"VARIABLE",     TK_VARIABLE},
    {"INTERVAL",     TK_INTERVAL},
143
    {"SESSION",      TK_SESSION},
144
    {"STATE_WINDOW", TK_STATE_WINDOW},
S
slguan 已提交
145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
    {"FILL",         TK_FILL},
    {"SLIDING",      TK_SLIDING},
    {"ORDER",        TK_ORDER},
    {"BY",           TK_BY},
    {"ASC",          TK_ASC},
    {"DESC",         TK_DESC},
    {"GROUP",        TK_GROUP},
    {"HAVING",       TK_HAVING},
    {"LIMIT",        TK_LIMIT},
    {"OFFSET",       TK_OFFSET},
    {"SLIMIT",       TK_SLIMIT},
    {"SOFFSET",      TK_SOFFSET},
    {"WHERE",        TK_WHERE},
    {"NOW",          TK_NOW},
    {"INSERT",       TK_INSERT},
    {"INTO",         TK_INTO},
    {"VALUES",       TK_VALUES},
Y
yihaoDeng 已提交
162
    {"UPDATE",       TK_UPDATE},
S
slguan 已提交
163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195
    {"RESET",        TK_RESET},
    {"QUERY",        TK_QUERY},
    {"ADD",          TK_ADD},
    {"COLUMN",       TK_COLUMN},
    {"TAG",          TK_TAG},
    {"CHANGE",       TK_CHANGE},
    {"SET",          TK_SET},
    {"KILL",         TK_KILL},
    {"CONNECTION",   TK_CONNECTION},
    {"COLON",        TK_COLON},
    {"STREAM",       TK_STREAM},
    {"ABORT",        TK_ABORT},
    {"AFTER",        TK_AFTER},
    {"ATTACH",       TK_ATTACH},
    {"BEFORE",       TK_BEFORE},
    {"BEGIN",        TK_BEGIN},
    {"CASCADE",      TK_CASCADE},
    {"CLUSTER",      TK_CLUSTER},
    {"CONFLICT",     TK_CONFLICT},
    {"COPY",         TK_COPY},
    {"DEFERRED",     TK_DEFERRED},
    {"DELIMITERS",   TK_DELIMITERS},
    {"DETACH",       TK_DETACH},
    {"EACH",         TK_EACH},
    {"END",          TK_END},
    {"EXPLAIN",      TK_EXPLAIN},
    {"FAIL",         TK_FAIL},
    {"FOR",          TK_FOR},
    {"IGNORE",       TK_IGNORE},
    {"IMMEDIATE",    TK_IMMEDIATE},
    {"INITIALLY",    TK_INITIALLY},
    {"INSTEAD",      TK_INSTEAD},
    {"MATCH",        TK_MATCH},
196
    {"NMATCH",       TK_NMATCH},
S
slguan 已提交
197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
    {"KEY",          TK_KEY},
    {"OF",           TK_OF},
    {"RAISE",        TK_RAISE},
    {"REPLACE",      TK_REPLACE},
    {"RESTRICT",     TK_RESTRICT},
    {"ROW",          TK_ROW},
    {"STATEMENT",    TK_STATEMENT},
    {"TRIGGER",      TK_TRIGGER},
    {"VIEW",         TK_VIEW},
    {"ALL",          TK_ALL},
    {"SEMI",         TK_SEMI},
    {"NONE",         TK_NONE},
    {"PREV",         TK_PREV},
    {"LINEAR",       TK_LINEAR},
    {"IMPORT",       TK_IMPORT},
    {"TBNAME",       TK_TBNAME},
    {"JOIN",         TK_JOIN},
    {"STABLE",       TK_STABLE},
    {"FILE",         TK_FILE},
L
lihui 已提交
216
    {"VNODES",       TK_VNODES},
217
    {"UNION",        TK_UNION},
D
dapan1121 已提交
218
    {"CACHELAST",    TK_CACHELAST},
dengyihao's avatar
TD-2571  
dengyihao 已提交
219
    {"DISTINCT",     TK_DISTINCT},
220
//    {"PARTITIONS",   TK_PARTITIONS},
D
dapan1121 已提交
221
    {"TOPIC",        TK_TOPIC},
D
dapan1121 已提交
222
    {"TOPICS",       TK_TOPICS},
H
Haojun Liao 已提交
223 224
    {"COMPACT",      TK_COMPACT},
    {"MODIFY",       TK_MODIFY},
D
dapan1121 已提交
225
    {"FUNCTION",     TK_FUNCTION},
D
dapan1121 已提交
226
    {"FUNCTIONS",    TK_FUNCTIONS},
227
    {"OUTPUTTYPE",   TK_OUTPUTTYPE},
D
dapan1121 已提交
228
    {"AGGREGATE",    TK_AGGREGATE},
H
Haojun Liao 已提交
229
    {"BUFSIZE",      TK_BUFSIZE},
H
hzcheng 已提交
230 231 232 233 234 235 236 237 238 239 240 241 242 243
};

static const char isIdChar[] = {
    /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
};

H
Haojun Liao 已提交
244
static void* keywordHashTable = NULL;
H
hzcheng 已提交
245

S
TD-1057  
Shengliang Guan 已提交
246
static void doInitKeywordsTable(void) {
247 248
  int numOfEntries = tListLen(keywordTable);
  
H
Haojun Liao 已提交
249
  keywordHashTable = taosHashInit(numOfEntries, MurmurHash3_32, true, false);
250
  for (int32_t i = 0; i < numOfEntries; i++) {
S
TD-1057  
Shengliang Guan 已提交
251
    keywordTable[i].len = (uint8_t)strlen(keywordTable[i].name);
252
    void* ptr = &keywordTable[i];
H
Haojun Liao 已提交
253
    taosHashPut(keywordHashTable, keywordTable[i].name, keywordTable[i].len, (void*)&ptr, POINTER_BYTES);
H
hzcheng 已提交
254
  }
255 256 257 258
}

static pthread_once_t keywordsHashTableInit = PTHREAD_ONCE_INIT;

259
static int32_t tKeywordCode(const char* z, int n) {
260
  pthread_once(&keywordsHashTableInit, doInitKeywordsTable);
H
hjxilinx 已提交
261 262
  
  char key[512] = {0};
263 264 265
  if (n > tListLen(key)) { // too long token, can not be any other token type
    return TK_ID;
  }
H
hjxilinx 已提交
266
  
H
hzcheng 已提交
267 268
  for (int32_t j = 0; j < n; ++j) {
    if (z[j] >= 'a' && z[j] <= 'z') {
269
      key[j] = (char)(z[j] & 0xDF);  // to uppercase and set the null-terminated
H
hzcheng 已提交
270 271 272 273 274
    } else {
      key[j] = z[j];
    }
  }

D
dapan1121 已提交
275 276 277
  if (keywordHashTable == NULL) {
    return TK_ILLEGAL;
  }
H
Haojun Liao 已提交
278

H
Haojun Liao 已提交
279
  SKeyword** pKey = (SKeyword**)taosHashGet(keywordHashTable, key, n);
H
Haojun Liao 已提交
280
  return (pKey != NULL)? (*pKey)->type:TK_ID;
H
hzcheng 已提交
281 282
}

H
huili 已提交
283
/*
284 285 286
 * Return the length of the token that begins at z[0].
 * Store the token type in *type before returning.
 */
287
uint32_t tGetToken(const char* z, uint32_t* tokenId) {
288
  uint32_t i;
H
hzcheng 已提交
289 290 291 292 293 294 295 296
  switch (*z) {
    case ' ':
    case '\t':
    case '\n':
    case '\f':
    case '\r': {
      for (i = 1; isspace(z[i]); i++) {
      }
H
Haojun Liao 已提交
297
      *tokenId = TK_SPACE;
H
hzcheng 已提交
298 299 300
      return i;
    }
    case ':': {
H
Haojun Liao 已提交
301
      *tokenId = TK_COLON;
H
hzcheng 已提交
302 303 304 305 306 307
      return 1;
    }
    case '-': {
      if (z[1] == '-') {
        for (i = 2; z[i] && z[i] != '\n'; i++) {
        }
H
Haojun Liao 已提交
308
        *tokenId = TK_COMMENT;
H
hzcheng 已提交
309 310
        return i;
      }
H
Haojun Liao 已提交
311
      *tokenId = TK_MINUS;
H
hzcheng 已提交
312 313 314
      return 1;
    }
    case '(': {
H
Haojun Liao 已提交
315
      *tokenId = TK_LP;
H
hzcheng 已提交
316 317 318
      return 1;
    }
    case ')': {
H
Haojun Liao 已提交
319
      *tokenId = TK_RP;
H
hzcheng 已提交
320 321 322
      return 1;
    }
    case ';': {
H
Haojun Liao 已提交
323
      *tokenId = TK_SEMI;
H
hzcheng 已提交
324 325 326
      return 1;
    }
    case '+': {
H
Haojun Liao 已提交
327
      *tokenId = TK_PLUS;
H
hzcheng 已提交
328 329 330
      return 1;
    }
    case '*': {
H
Haojun Liao 已提交
331
      *tokenId = TK_STAR;
H
hzcheng 已提交
332 333 334 335
      return 1;
    }
    case '/': {
      if (z[1] != '*' || z[2] == 0) {
H
Haojun Liao 已提交
336
        *tokenId = TK_SLASH;
H
hzcheng 已提交
337 338 339 340 341
        return 1;
      }
      for (i = 3; z[i] && (z[i] != '/' || z[i - 1] != '*'); i++) {
      }
      if (z[i]) i++;
H
Haojun Liao 已提交
342
      *tokenId = TK_COMMENT;
H
hzcheng 已提交
343 344 345
      return i;
    }
    case '%': {
H
Haojun Liao 已提交
346
      *tokenId = TK_REM;
H
hzcheng 已提交
347 348 349
      return 1;
    }
    case '=': {
H
Haojun Liao 已提交
350
      *tokenId = TK_EQ;
H
hzcheng 已提交
351 352 353 354
      return 1 + (z[1] == '=');
    }
    case '<': {
      if (z[1] == '=') {
H
Haojun Liao 已提交
355
        *tokenId = TK_LE;
H
hzcheng 已提交
356 357
        return 2;
      } else if (z[1] == '>') {
H
Haojun Liao 已提交
358
        *tokenId = TK_NE;
H
hzcheng 已提交
359 360
        return 2;
      } else if (z[1] == '<') {
H
Haojun Liao 已提交
361
        *tokenId = TK_LSHIFT;
H
hzcheng 已提交
362 363
        return 2;
      } else {
H
Haojun Liao 已提交
364
        *tokenId = TK_LT;
H
hzcheng 已提交
365 366 367 368 369
        return 1;
      }
    }
    case '>': {
      if (z[1] == '=') {
H
Haojun Liao 已提交
370
        *tokenId = TK_GE;
H
hzcheng 已提交
371 372
        return 2;
      } else if (z[1] == '>') {
H
Haojun Liao 已提交
373
        *tokenId = TK_RSHIFT;
H
hzcheng 已提交
374 375
        return 2;
      } else {
H
Haojun Liao 已提交
376
        *tokenId = TK_GT;
H
hzcheng 已提交
377 378 379 380 381
        return 1;
      }
    }
    case '!': {
      if (z[1] != '=') {
H
Haojun Liao 已提交
382
        *tokenId = TK_ILLEGAL;
H
hzcheng 已提交
383 384
        return 2;
      } else {
H
Haojun Liao 已提交
385
        *tokenId = TK_NE;
H
hzcheng 已提交
386 387 388 389 390
        return 2;
      }
    }
    case '|': {
      if (z[1] != '|') {
H
Haojun Liao 已提交
391
        *tokenId = TK_BITOR;
H
hzcheng 已提交
392 393
        return 1;
      } else {
H
Haojun Liao 已提交
394
        *tokenId = TK_CONCAT;
H
hzcheng 已提交
395 396 397 398
        return 2;
      }
    }
    case ',': {
H
Haojun Liao 已提交
399
      *tokenId = TK_COMMA;
H
hzcheng 已提交
400 401 402
      return 1;
    }
    case '&': {
H
Haojun Liao 已提交
403
      *tokenId = TK_BITAND;
H
hzcheng 已提交
404 405 406
      return 1;
    }
    case '~': {
H
Haojun Liao 已提交
407
      *tokenId = TK_BITNOT;
H
hzcheng 已提交
408 409
      return 1;
    }
S
slguan 已提交
410
    case '?': {
H
Haojun Liao 已提交
411
      *tokenId = TK_QUESTION;
S
slguan 已提交
412 413
      return 1;
    }
414
    case '`':
H
hzcheng 已提交
415 416
    case '\'':
    case '"': {
S
slguan 已提交
417 418
      int  delim = z[0];
      bool strEnd = false;
H
hzcheng 已提交
419
      for (i = 1; z[i]; i++) {
H
Haojun Liao 已提交
420
        if (z[i] == '\\') {   // ignore the escaped character that follows this backslash
L
[1292]  
lihui 已提交
421 422 423 424
          i++;
          continue;
        }
        
425
        if (z[i] == delim) {
H
hzcheng 已提交
426 427 428
          if (z[i + 1] == delim) {
            i++;
          } else {
H
huili 已提交
429
            strEnd = true;
H
hzcheng 已提交
430 431 432 433
            break;
          }
        }
      }
L
[1292]  
lihui 已提交
434
      
H
hzcheng 已提交
435
      if (z[i]) i++;
H
huili 已提交
436

S
slguan 已提交
437
      if (strEnd) {
438
        *tokenId = (delim == '`')? TK_ID:TK_STRING;
S
slguan 已提交
439 440
        return i;
      }
H
huili 已提交
441

S
slguan 已提交
442
      break;
H
hzcheng 已提交
443 444
    }
    case '.': {
S
slguan 已提交
445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461
      /*
       * handle the the float number with out integer part
       * .123
       * .123e4
       */
      if (isdigit(z[1])) {
        for (i = 2; isdigit(z[i]); i++) {
        }

        if ((z[i] == 'e' || z[i] == 'E') &&
            (isdigit(z[i + 1]) || ((z[i + 1] == '+' || z[i + 1] == '-') && isdigit(z[i + 2])))) {
          i += 2;
          while (isdigit(z[i])) {
            i++;
          }
        }

H
Haojun Liao 已提交
462
        *tokenId = TK_FLOAT;
S
slguan 已提交
463 464
        return i;
      } else {
H
Haojun Liao 已提交
465
        *tokenId = TK_DOT;
S
slguan 已提交
466 467 468 469 470 471 472 473
        return 1;
      }
    }

    case '0': {
      char next = z[1];

      if (next == 'b') { // bin number
H
Haojun Liao 已提交
474
        *tokenId = TK_BIN;
S
slguan 已提交
475 476 477 478 479 480 481 482 483
        for (i = 2; (z[i] == '0' || z[i] == '1'); ++i) {
        }

        if (i == 2) {
          break;
        }

        return i;
      } else if (next == 'x') {  //hex number
H
Haojun Liao 已提交
484
        *tokenId = TK_HEX;
S
slguan 已提交
485 486 487 488 489 490 491 492 493
        for (i = 2; isdigit(z[i]) || (z[i] >= 'a' && z[i] <= 'f') || (z[i] >= 'A' && z[i] <= 'F'); ++i) {
        }

        if (i == 2) {
          break;
        }

        return i;
      }
H
hzcheng 已提交
494 495 496 497 498 499 500 501 502 503
    }
    case '1':
    case '2':
    case '3':
    case '4':
    case '5':
    case '6':
    case '7':
    case '8':
    case '9': {
H
Haojun Liao 已提交
504
      *tokenId = TK_INTEGER;
H
hzcheng 已提交
505 506 507
      for (i = 1; isdigit(z[i]); i++) {
      }

H
Haojun Liao 已提交
508
      /* here is the 1u/1a/2s/3m/9y */
509
      if ((z[i] == 'b' || z[i] == 'u' || z[i] == 'a' || z[i] == 's' || z[i] == 'm' || z[i] == 'h' || z[i] == 'd' || z[i] == 'n' ||
H
Haojun Liao 已提交
510
           z[i] == 'y' || z[i] == 'w' ||
511
           z[i] == 'B' || z[i] == 'U' || z[i] == 'A' || z[i] == 'S' || z[i] == 'M' || z[i] == 'H' || z[i] == 'D' || z[i] == 'N' ||
H
Haojun Liao 已提交
512
           z[i] == 'Y' || z[i] == 'W') &&
H
hjxilinx 已提交
513
          (isIdChar[(uint8_t)z[i + 1]] == 0)) {
H
Haojun Liao 已提交
514
        *tokenId = TK_VARIABLE;
H
hzcheng 已提交
515 516 517 518 519 520 521 522 523 524
        i += 1;
        return i;
      }

      int32_t seg = 1;
      while (z[i] == '.' && isdigit(z[i + 1])) {
        i += 2;
        while (isdigit(z[i])) {
          i++;
        }
H
Haojun Liao 已提交
525
        *tokenId = TK_FLOAT;
H
hzcheng 已提交
526 527 528 529
        seg++;
      }

      if (seg == 4) {  // ip address
H
Haojun Liao 已提交
530
        *tokenId = TK_IPTOKEN;
H
hzcheng 已提交
531 532 533 534 535 536 537 538 539
        return i;
      }

      if ((z[i] == 'e' || z[i] == 'E') &&
          (isdigit(z[i + 1]) || ((z[i + 1] == '+' || z[i + 1] == '-') && isdigit(z[i + 2])))) {
        i += 2;
        while (isdigit(z[i])) {
          i++;
        }
H
Haojun Liao 已提交
540
        *tokenId = TK_FLOAT;
H
hzcheng 已提交
541 542 543 544 545 546
      }
      return i;
    }
    case '[': {
      for (i = 1; z[i] && z[i - 1] != ']'; i++) {
      }
H
Haojun Liao 已提交
547
      *tokenId = TK_ID;
H
hzcheng 已提交
548 549 550 551 552 553
      return i;
    }
    case 'T':
    case 't':
    case 'F':
    case 'f': {
H
hjxilinx 已提交
554
      for (i = 1; ((z[i] & 0x80) == 0) && isIdChar[(uint8_t) z[i]]; i++) {
H
hzcheng 已提交
555 556 557
      }

      if ((i == 4 && strncasecmp(z, "true", 4) == 0) || (i == 5 && strncasecmp(z, "false", 5) == 0)) {
H
Haojun Liao 已提交
558
        *tokenId = TK_BOOL;
H
hzcheng 已提交
559 560 561 562
        return i;
      }
    }
    default: {
H
hjxilinx 已提交
563
      if (((*z & 0x80) != 0) || !isIdChar[(uint8_t) *z]) {
H
hzcheng 已提交
564 565
        break;
      }
H
hjxilinx 已提交
566
      for (i = 1; ((z[i] & 0x80) == 0) && isIdChar[(uint8_t) z[i]]; i++) {
H
hzcheng 已提交
567
      }
568
      *tokenId = tKeywordCode(z, i);
H
hzcheng 已提交
569 570 571 572
      return i;
    }
  }

H
Haojun Liao 已提交
573
  *tokenId = TK_ILLEGAL;
H
hzcheng 已提交
574 575 576
  return 0;
}

H
Haojun Liao 已提交
577
SToken tscReplaceStrToken(char **str, SToken *token, const char* newToken) {
D
dapan1121 已提交
578
  char *src = *str;
D
dapan1121 已提交
579 580 581
  size_t nsize = strlen(newToken);
  int32_t size = (int32_t)strlen(*str) - token->n + (int32_t)nsize + 1;
  int32_t bsize = (int32_t)((uint64_t)token->z - (uint64_t)src);
H
Haojun Liao 已提交
582
  SToken ntoken;
D
dapan1121 已提交
583 584 585 586

  *str = calloc(1, size);

  strncpy(*str, src, bsize);
H
Haojun Liao 已提交
587
  strcat(*str, newToken);
D
dapan1121 已提交
588 589
  strcat(*str, token->z + token->n);

D
dapan1121 已提交
590
  ntoken.n = (uint32_t)nsize;
D
dapan1121 已提交
591 592 593 594 595 596 597
  ntoken.z = *str + bsize;

  tfree(src);

  return ntoken;
}

598
SToken tStrGetToken(const char* str, int32_t* i, bool isPrevOptr) {
H
Haojun Liao 已提交
599
  SToken t0 = {0};
S
slguan 已提交
600

H
hzcheng 已提交
601 602
  // here we reach the end of sql string, null-terminated string
  if (str[*i] == 0) {
S
slguan 已提交
603 604
    t0.n = 0;
    return t0;
H
hzcheng 已提交
605 606
  }

S
slguan 已提交
607 608 609 610
  // IGNORE TK_SPACE, TK_COMMA, and specified tokens
  while (1) {
    *i += t0.n;

H
Haojun Liao 已提交
611 612 613 614 615 616
    int32_t numOfComma = 0;
    char t = str[*i];
    while (t == ' ' || t == '\n' || t == '\r' || t == '\t' || t == '\f' || t == ',') {
      if (t == ',' && (++numOfComma > 1)) {  // comma only allowed once
        t0.n = 0;
        return t0;
S
slguan 已提交
617
      }
H
Haojun Liao 已提交
618
    
H
Haojun Liao 已提交
619
      t = str[++(*i)];
S
slguan 已提交
620
    }
H
hzcheng 已提交
621

622
    t0.n = tGetToken(&str[*i], &t0.type);
H
Haojun Liao 已提交
623
    break;
S
slguan 已提交
624

H
Haojun Liao 已提交
625 626
    // not support user specfied ignored symbol list
#if 0
H
Haojun Liao 已提交
627
    bool ignore = false;
S
slguan 已提交
628 629
    for (uint32_t k = 0; k < numOfIgnoreToken; k++) {
      if (t0.type == ignoreTokenTypes[k]) {
H
Haojun Liao 已提交
630
        ignore = true;
S
slguan 已提交
631 632 633 634
        break;
      }
    }

H
Haojun Liao 已提交
635
    if (!ignore) {
S
slguan 已提交
636 637
      break;
    }
H
Haojun Liao 已提交
638
#endif
H
hzcheng 已提交
639 640
  }

S
slguan 已提交
641 642 643 644 645 646 647 648 649 650
  if (t0.type == TK_SEMI) {
    t0.n = 0;
    return t0;
  }

  uint32_t type = 0;
  int32_t  len;

  // support parse the 'db.tbl' format, notes: There should be no space on either side of the dot!
  if ('.' == str[*i + t0.n]) {
651
    len = tGetToken(&str[*i + t0.n + 1], &type);
S
slguan 已提交
652 653 654 655 656 657 658 659 660 661 662 663 664 665

    // only id and string are valid
    if ((TK_STRING != t0.type) && (TK_ID != t0.type)) {
      t0.type = TK_ILLEGAL;
      t0.n = 0;

      return t0;
    }

    t0.n += len + 1;

  } else {
    // support parse the -/+number format
    if ((isPrevOptr) && (t0.type == TK_MINUS || t0.type == TK_PLUS)) {
666
      len = tGetToken(&str[*i + t0.n], &type);
S
slguan 已提交
667 668 669 670
      if (type == TK_INTEGER || type == TK_FLOAT) {
        t0.type = type;
        t0.n += len;
      }
H
hzcheng 已提交
671 672 673
    }
  }

S
slguan 已提交
674 675 676 677
  t0.z = str + (*i);
  *i += t0.n;

  return t0;
H
hzcheng 已提交
678 679
}

680 681 682
bool taosIsKeyWordToken(const char* z, int32_t len) {
  return (tKeywordCode((char*)z, len) != TK_ID);
}
H
Haojun Liao 已提交
683 684

void taosCleanupKeywordsTable() {
H
Haojun Liao 已提交
685 686 687 688
  void* m = keywordHashTable;
  if (m != NULL && atomic_val_compare_exchange_ptr(&keywordHashTable, m, 0) == m) {
    taosHashCleanup(m);
  }
Y
yihaoDeng 已提交
689
}
690

H
Haojun Liao 已提交
691
SToken taosTokenDup(SToken* pToken, char* buf, int32_t len) {
692 693 694 695 696
  assert(pToken != NULL && buf != NULL && len > pToken->n);
  
  strncpy(buf, pToken->z, pToken->n);
  buf[pToken->n] = 0;

H
Haojun Liao 已提交
697
  SToken token = *pToken;
698 699 700
  token.z = buf;
  return token;
}