parTokenizer.c 19.2 KB
Newer Older
H
hzcheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

H
hjxilinx 已提交
16
#include "os.h"
X
Xiaoyu Wang 已提交
17
#include "parToken.h"
H
Haojun Liao 已提交
18
#include "thash.h"
H
hjxilinx 已提交
19
#include "taosdef.h"
20
#include "ttokendef.h"
H
hzcheng 已提交
21

S
slguan 已提交
22
// All the keywords of the SQL language are stored in a hash table
H
hzcheng 已提交
23
typedef struct SKeyword {
S
slguan 已提交
24
  const char* name;  // The keyword name
H
Haojun Liao 已提交
25
  uint16_t    type;  // type
S
slguan 已提交
26
  uint8_t     len;   // length
H
hzcheng 已提交
27 28
} SKeyword;

S
slguan 已提交
29
// keywords in sql string
H
hzcheng 已提交
30
static SKeyword keywordTable[] = {
31
    {"ACCOUNT",       TK_ACCOUNT},
32
    {"ALL",           TK_ALL},
33
    {"ALTER",         TK_ALTER},
34
    {"ANALYZE",       TK_ANALYZE},
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
    {"AND",           TK_AND},
    {"AS",            TK_AS},
    {"ASC",           TK_ASC},
    {"BETWEEN",       TK_BETWEEN},
    {"BINARY",        TK_BINARY},
    {"BIGINT",        TK_BIGINT},
    {"BLOCKS",        TK_BLOCKS},
    {"BOOL",          TK_BOOL},
    {"BY",            TK_BY},
    {"CACHE",         TK_CACHE},
    {"CACHELAST",     TK_CACHELAST},
    {"COMMENT",       TK_COMMENT},
    {"COMP",          TK_COMP},
    {"CREATE",        TK_CREATE},
    {"DATABASE",      TK_DATABASE},
    {"DATABASES",     TK_DATABASES},
    {"DAYS",          TK_DAYS},
X
Xiaoyu Wang 已提交
52
    {"DELAY",         TK_DELAY},
53 54
    {"DESC",          TK_DESC},
    {"DISTINCT",      TK_DISTINCT},
55 56
    {"DNODE",         TK_DNODE},
    {"DNODES",        TK_DNODES},
57
    {"DOUBLE",        TK_DOUBLE},
58
    {"DROP",          TK_DROP},
59
    {"EXISTS",        TK_EXISTS},
60
    {"EXPLAIN",       TK_EXPLAIN},
X
Xiaoyu Wang 已提交
61
    {"FILE_FACTOR",   TK_FILE_FACTOR},
62 63 64 65
    {"FILL",          TK_FILL},
    {"FLOAT",         TK_FLOAT},
    {"FROM",          TK_FROM},
    {"FSYNC",         TK_FSYNC},
X
Xiaoyu Wang 已提交
66
    {"FUNCTION",      TK_FUNCTION},
X
Xiaoyu Wang 已提交
67
    {"FUNCTIONS",     TK_FUNCTIONS},
68 69 70 71 72
    {"GROUP",         TK_GROUP},
    {"HAVING",        TK_HAVING},
    {"IF",            TK_IF},
    {"IMPORT",        TK_IMPORT},
    {"IN",            TK_IN},
X
Xiaoyu Wang 已提交
73
    {"INDEX",         TK_INDEX},
X
Xiaoyu Wang 已提交
74
    {"INDEXES",       TK_INDEXES},
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
    {"INNER",         TK_INNER},
    {"INT",           TK_INT},
    {"INSERT",        TK_INSERT},
    {"INTEGER",       TK_INTEGER},
    {"INTERVAL",      TK_INTERVAL},
    {"INTO",          TK_INTO},
    {"IS",            TK_IS},
    {"JOIN",          TK_JOIN},
    {"JSON",          TK_JSON},
    {"KEEP",          TK_KEEP},
    {"LIKE",          TK_LIKE},
    {"LIMIT",         TK_LIMIT},
    {"LINEAR",        TK_LINEAR},
    {"MATCH",         TK_MATCH},
    {"MAXROWS",       TK_MAXROWS},
    {"MINROWS",       TK_MINROWS},
    {"MINUS",         TK_MINUS},
X
Xiaoyu Wang 已提交
92
    {"MNODES",        TK_MNODES},
X
Xiaoyu Wang 已提交
93
    {"MODULES",       TK_MODULES},
94 95 96 97 98 99 100 101 102 103
    {"NCHAR",         TK_NCHAR},
    {"NMATCH",        TK_NMATCH},
    {"NONE",          TK_NONE},
    {"NOT",           TK_NOT},
    {"NOW",           TK_NOW},
    {"NULL",          TK_NULL},
    {"OFFSET",        TK_OFFSET},
    {"ON",            TK_ON},
    {"OR",            TK_OR},
    {"ORDER",         TK_ORDER},
104
    {"PARTITION",     TK_PARTITION},
105 106 107 108 109
    {"PASS",          TK_PASS},
    {"PORT",          TK_PORT},
    {"PRECISION",     TK_PRECISION},
    {"PRIVILEGE",     TK_PRIVILEGE},
    {"PREV",          TK_PREV},
110
    {"QENDTS",        TK_QENDTS},
X
Xiaoyu Wang 已提交
111
    {"QNODE",         TK_QNODE},
X
Xiaoyu Wang 已提交
112
    {"QNODES",        TK_QNODES},
113
    {"QSTARTTS",      TK_QSTARTTS},
114
    {"QUORUM",        TK_QUORUM},
115
    {"RATIO",         TK_RATIO},
116
    {"REPLICA",       TK_REPLICA},
X
Xiaoyu Wang 已提交
117 118
    {"RETENTIONS",    TK_RETENTIONS},
    {"ROLLUP",        TK_ROLLUP},
119
    {"ROWTS",         TK_ROWTS},
120 121 122 123 124 125 126 127 128
    {"SELECT",        TK_SELECT},
    {"SESSION",       TK_SESSION},
    {"SHOW",          TK_SHOW},
    {"SINGLE_STABLE", TK_SINGLE_STABLE},
    {"SLIDING",       TK_SLIDING},
    {"SLIMIT",        TK_SLIMIT},
    {"SMA",           TK_SMA},
    {"SMALLINT",      TK_SMALLINT},
    {"SOFFSET",       TK_SOFFSET},
129
    {"STABLE",        TK_STABLE},
130
    {"STABLES",       TK_STABLES},
131
    {"STATE_WINDOW",  TK_STATE_WINDOW},
X
Xiaoyu Wang 已提交
132
    {"STREAMS",       TK_STREAMS},
133 134 135 136
    {"STREAM_MODE",   TK_STREAM_MODE},
    {"TABLE",         TK_TABLE},
    {"TABLES",        TK_TABLES},
    {"TAGS",          TK_TAGS},
137
    {"TBNAME",        TK_TBNAME},
138 139
    {"TIMESTAMP",     TK_TIMESTAMP},
    {"TINYINT",       TK_TINYINT},
140
    {"TOPIC",         TK_TOPIC},
141 142 143 144
    {"TTL",           TK_TTL},
    {"UNION",         TK_UNION},
    {"UNSIGNED",      TK_UNSIGNED},
    {"USE",           TK_USE},
145 146
    {"USER",          TK_USER},
    {"USERS",         TK_USERS},
147 148 149
    {"USING",         TK_USING},
    {"VALUES",        TK_VALUES},
    {"VARCHAR",       TK_VARCHAR},
150
    {"VERBOSE",       TK_VERBOSE},
151 152
    {"VGROUPS",       TK_VGROUPS},
    {"WAL",           TK_WAL},
153 154
    {"WDURATION",     TK_WDURATION},
    {"WENDTS",        TK_WENDTS},
155
    {"WHERE",         TK_WHERE},
156
    {"WSTARTTS",      TK_WSTARTTS},
157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247
    // {"ID",           TK_ID},
    // {"STRING",       TK_STRING},
    // {"EQ",           TK_EQ},
    // {"NE",           TK_NE},
    // {"ISNULL",       TK_ISNULL},
    // {"NOTNULL",      TK_NOTNULL},
    // {"GLOB",         TK_GLOB},
    // {"GT",           TK_GT},
    // {"GE",           TK_GE},
    // {"LT",           TK_LT},
    // {"LE",           TK_LE},
    // {"BITAND",       TK_BITAND},
    // {"BITOR",        TK_BITOR},
    // {"LSHIFT",       TK_LSHIFT},
    // {"RSHIFT",       TK_RSHIFT},
    // {"PLUS",         TK_PLUS},
    // {"DIVIDE",       TK_DIVIDE},
    // {"TIMES",        TK_TIMES},
    // {"STAR",         TK_STAR},
    // {"SLASH",        TK_SLASH},
    // {"REM ",         TK_REM},
    // {"||",           TK_CONCAT},
    // {"UMINUS",       TK_UMINUS},
    // {"UPLUS",        TK_UPLUS},
    // {"BITNOT",       TK_BITNOT},
    // {"ACCOUNTS",     TK_ACCOUNTS},
    // {"QUERIES",      TK_QUERIES},
    // {"CONNECTIONS",  TK_CONNECTIONS},
    // {"VARIABLES",    TK_VARIABLES},
    // {"SCORES",       TK_SCORES},
    // {"GRANTS",       TK_GRANTS},
    // {"DOT",          TK_DOT},
    // {"DESCRIBE",     TK_DESCRIBE},
    // {"SYNCDB",       TK_SYNCDB},
    // {"LOCAL",        TK_LOCAL},
    // {"PPS",          TK_PPS},
    // {"TSERIES",      TK_TSERIES},
    // {"DBS",          TK_DBS},
    // {"STORAGE",      TK_STORAGE},
    // {"QTIME",        TK_QTIME},
    // {"CONNS",        TK_CONNS},
    // {"STATE",        TK_STATE},
    // {"CTIME",        TK_CTIME},
    // {"LP",           TK_LP},
    // {"RP",           TK_RP},
    // {"COMMA",        TK_COMMA},
    // {"EVERY",        TK_EVERY},
    // {"VARIABLE",     TK_VARIABLE},
    // {"UPDATE",       TK_UPDATE},
    // {"RESET",        TK_RESET},
    // {"QUERY",        TK_QUERY},
    // {"ADD",          TK_ADD},
    // {"COLUMN",       TK_COLUMN},
    // {"TAG",          TK_TAG},
    // {"CHANGE",       TK_CHANGE},
    // {"SET",          TK_SET},
    // {"KILL",         TK_KILL},
    // {"CONNECTION",   TK_CONNECTION},
    // {"COLON",        TK_COLON},
    // {"STREAM",       TK_STREAM},
    // {"ABORT",        TK_ABORT},
    // {"AFTER",        TK_AFTER},
    // {"ATTACH",       TK_ATTACH},
    // {"BEFORE",       TK_BEFORE},
    // {"BEGIN",        TK_BEGIN},
    // {"CASCADE",      TK_CASCADE},
    // {"CLUSTER",      TK_CLUSTER},
    // {"CONFLICT",     TK_CONFLICT},
    // {"COPY",         TK_COPY},
    // {"DEFERRED",     TK_DEFERRED},
    // {"DELIMITERS",   TK_DELIMITERS},
    // {"DETACH",       TK_DETACH},
    // {"EACH",         TK_EACH},
    // {"END",          TK_END},
    // {"FAIL",         TK_FAIL},
    // {"FOR",          TK_FOR},
    // {"IGNORE",       TK_IGNORE},
    // {"IMMEDIATE",    TK_IMMEDIATE},
    // {"INITIALLY",    TK_INITIALLY},
    // {"INSTEAD",      TK_INSTEAD},
    // {"KEY",          TK_KEY},
    // {"OF",           TK_OF},
    // {"RAISE",        TK_RAISE},
    // {"REPLACE",      TK_REPLACE},
    // {"RESTRICT",     TK_RESTRICT},
    // {"ROW",          TK_ROW},
    // {"STATEMENT",    TK_STATEMENT},
    // {"TRIGGER",      TK_TRIGGER},
    // {"VIEW",         TK_VIEW},
    // {"SEMI",         TK_SEMI},
    // {"VNODES",       TK_VNODES},
248
//    {"PARTITIONS",   TK_PARTITIONS},
249 250 251 252 253 254 255
    // {"TOPICS",       TK_TOPICS},
    // {"COMPACT",      TK_COMPACT},
    // {"MODIFY",       TK_MODIFY},
    // {"OUTPUTTYPE",   TK_OUTPUTTYPE},
    // {"AGGREGATE",    TK_AGGREGATE},
    // {"BUFSIZE",      TK_BUFSIZE},
    // {"MODE",         TK_MODE},
H
hzcheng 已提交
256 257 258 259 260 261 262 263 264 265 266 267 268 269
};

static const char isIdChar[] = {
    /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
};

H
Haojun Liao 已提交
270
static void* keywordHashTable = NULL;
H
hzcheng 已提交
271

S
TD-1057  
Shengliang Guan 已提交
272
static void doInitKeywordsTable(void) {
273 274
  int numOfEntries = tListLen(keywordTable);
  
H
Haojun Liao 已提交
275
  keywordHashTable = taosHashInit(numOfEntries, MurmurHash3_32, true, false);
276
  for (int32_t i = 0; i < numOfEntries; i++) {
S
TD-1057  
Shengliang Guan 已提交
277
    keywordTable[i].len = (uint8_t)strlen(keywordTable[i].name);
278
    void* ptr = &keywordTable[i];
H
Haojun Liao 已提交
279
    taosHashPut(keywordHashTable, keywordTable[i].name, keywordTable[i].len, (void*)&ptr, POINTER_BYTES);
H
hzcheng 已提交
280
  }
281 282
}

wafwerar's avatar
wafwerar 已提交
283
static TdThreadOnce keywordsHashTableInit = PTHREAD_ONCE_INIT;
284

285
static int32_t tKeywordCode(const char* z, int n) {
wafwerar's avatar
wafwerar 已提交
286
  taosThreadOnce(&keywordsHashTableInit, doInitKeywordsTable);
H
hjxilinx 已提交
287 288
  
  char key[512] = {0};
289
  if (n > tListLen(key)) { // too long token, can not be any other token type
290
    return TK_NK_ID;
291
  }
H
hjxilinx 已提交
292
  
H
hzcheng 已提交
293 294
  for (int32_t j = 0; j < n; ++j) {
    if (z[j] >= 'a' && z[j] <= 'z') {
295
      key[j] = (char)(z[j] & 0xDF);  // to uppercase and set the null-terminated
H
hzcheng 已提交
296 297 298 299 300
    } else {
      key[j] = z[j];
    }
  }

D
dapan1121 已提交
301
  if (keywordHashTable == NULL) {
302
    return TK_NK_ILLEGAL;
D
dapan1121 已提交
303
  }
H
Haojun Liao 已提交
304

H
Haojun Liao 已提交
305
  SKeyword** pKey = (SKeyword**)taosHashGet(keywordHashTable, key, n);
306
  return (pKey != NULL)? (*pKey)->type:TK_NK_ID;
H
hzcheng 已提交
307 308
}

H
huili 已提交
309
/*
310 311 312
 * Return the length of the token that begins at z[0].
 * Store the token type in *type before returning.
 */
313
uint32_t tGetToken(const char* z, uint32_t* tokenId) {
314
  uint32_t i;
H
hzcheng 已提交
315 316 317 318 319 320 321 322
  switch (*z) {
    case ' ':
    case '\t':
    case '\n':
    case '\f':
    case '\r': {
      for (i = 1; isspace(z[i]); i++) {
      }
323
      *tokenId = TK_NK_SPACE;
H
hzcheng 已提交
324 325 326
      return i;
    }
    case ':': {
327
      *tokenId = TK_NK_COLON;
H
hzcheng 已提交
328 329 330 331 332 333
      return 1;
    }
    case '-': {
      if (z[1] == '-') {
        for (i = 2; z[i] && z[i] != '\n'; i++) {
        }
334
        *tokenId = TK_NK_COMMENT;
H
hzcheng 已提交
335 336
        return i;
      }
X
Xiaoyu Wang 已提交
337
      *tokenId = TK_NK_MINUS;
H
hzcheng 已提交
338 339 340
      return 1;
    }
    case '(': {
341
      *tokenId = TK_NK_LP;
H
hzcheng 已提交
342 343 344
      return 1;
    }
    case ')': {
345
      *tokenId = TK_NK_RP;
H
hzcheng 已提交
346 347 348
      return 1;
    }
    case ';': {
349
      *tokenId = TK_NK_SEMI;
H
hzcheng 已提交
350 351 352
      return 1;
    }
    case '+': {
353
      *tokenId = TK_NK_PLUS;
H
hzcheng 已提交
354 355 356
      return 1;
    }
    case '*': {
357
      *tokenId = TK_NK_STAR;
H
hzcheng 已提交
358 359 360 361
      return 1;
    }
    case '/': {
      if (z[1] != '*' || z[2] == 0) {
362
        *tokenId = TK_NK_SLASH;
H
hzcheng 已提交
363 364 365 366 367
        return 1;
      }
      for (i = 3; z[i] && (z[i] != '/' || z[i - 1] != '*'); i++) {
      }
      if (z[i]) i++;
368
      *tokenId = TK_NK_COMMENT;
H
hzcheng 已提交
369 370 371
      return i;
    }
    case '%': {
372
      *tokenId = TK_NK_REM;
H
hzcheng 已提交
373 374 375
      return 1;
    }
    case '=': {
376
      *tokenId = TK_NK_EQ;
H
hzcheng 已提交
377 378 379 380
      return 1 + (z[1] == '=');
    }
    case '<': {
      if (z[1] == '=') {
381
        *tokenId = TK_NK_LE;
H
hzcheng 已提交
382 383
        return 2;
      } else if (z[1] == '>') {
384
        *tokenId = TK_NK_NE;
H
hzcheng 已提交
385 386
        return 2;
      } else if (z[1] == '<') {
387
        *tokenId = TK_NK_LSHIFT;
H
hzcheng 已提交
388 389
        return 2;
      } else {
390
        *tokenId = TK_NK_LT;
H
hzcheng 已提交
391 392 393 394 395
        return 1;
      }
    }
    case '>': {
      if (z[1] == '=') {
396
        *tokenId = TK_NK_GE;
H
hzcheng 已提交
397 398
        return 2;
      } else if (z[1] == '>') {
399
        *tokenId = TK_NK_RSHIFT;
H
hzcheng 已提交
400 401
        return 2;
      } else {
402
        *tokenId = TK_NK_GT;
H
hzcheng 已提交
403 404 405 406 407
        return 1;
      }
    }
    case '!': {
      if (z[1] != '=') {
408
        *tokenId = TK_NK_ILLEGAL;
H
hzcheng 已提交
409 410
        return 2;
      } else {
411
        *tokenId = TK_NK_NE;
H
hzcheng 已提交
412 413 414 415 416
        return 2;
      }
    }
    case '|': {
      if (z[1] != '|') {
417
        *tokenId = TK_NK_BITOR;
H
hzcheng 已提交
418 419
        return 1;
      } else {
420
        *tokenId = TK_NK_CONCAT;
H
hzcheng 已提交
421 422 423 424
        return 2;
      }
    }
    case ',': {
425
      *tokenId = TK_NK_COMMA;
H
hzcheng 已提交
426 427 428
      return 1;
    }
    case '&': {
429
      *tokenId = TK_NK_BITAND;
H
hzcheng 已提交
430 431 432
      return 1;
    }
    case '~': {
433
      *tokenId = TK_NK_BITNOT;
H
hzcheng 已提交
434 435
      return 1;
    }
S
slguan 已提交
436
    case '?': {
437
      *tokenId = TK_NK_QUESTION;
S
slguan 已提交
438 439
      return 1;
    }
440 441 442 443
    case '_': {
      *tokenId = TK_NK_UNDERLINE;
      return 1;
    }
444
    case '`':
H
hzcheng 已提交
445 446
    case '\'':
    case '"': {
S
slguan 已提交
447 448
      int  delim = z[0];
      bool strEnd = false;
H
hzcheng 已提交
449
      for (i = 1; z[i]; i++) {
H
Haojun Liao 已提交
450
        if (z[i] == '\\') {   // ignore the escaped character that follows this backslash
L
[1292]  
lihui 已提交
451 452 453 454
          i++;
          continue;
        }
        
455
        if (z[i] == delim) {
H
hzcheng 已提交
456 457 458
          if (z[i + 1] == delim) {
            i++;
          } else {
H
huili 已提交
459
            strEnd = true;
H
hzcheng 已提交
460 461 462 463
            break;
          }
        }
      }
L
[1292]  
lihui 已提交
464
      
H
hzcheng 已提交
465
      if (z[i]) i++;
H
huili 已提交
466

S
slguan 已提交
467
      if (strEnd) {
468
        *tokenId = (delim == '`')? TK_NK_ID:TK_NK_STRING;
S
slguan 已提交
469 470
        return i;
      }
H
huili 已提交
471

S
slguan 已提交
472
      break;
H
hzcheng 已提交
473 474
    }
    case '.': {
S
slguan 已提交
475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491
      /*
       * handle the the float number with out integer part
       * .123
       * .123e4
       */
      if (isdigit(z[1])) {
        for (i = 2; isdigit(z[i]); i++) {
        }

        if ((z[i] == 'e' || z[i] == 'E') &&
            (isdigit(z[i + 1]) || ((z[i + 1] == '+' || z[i + 1] == '-') && isdigit(z[i + 2])))) {
          i += 2;
          while (isdigit(z[i])) {
            i++;
          }
        }

492
        *tokenId = TK_NK_FLOAT;
S
slguan 已提交
493 494
        return i;
      } else {
495
        *tokenId = TK_NK_DOT;
S
slguan 已提交
496 497 498 499 500 501 502 503
        return 1;
      }
    }

    case '0': {
      char next = z[1];

      if (next == 'b') { // bin number
504
        *tokenId = TK_NK_BIN;
S
slguan 已提交
505 506 507 508 509 510 511 512 513
        for (i = 2; (z[i] == '0' || z[i] == '1'); ++i) {
        }

        if (i == 2) {
          break;
        }

        return i;
      } else if (next == 'x') {  //hex number
514
        *tokenId = TK_NK_HEX;
S
slguan 已提交
515 516 517 518 519 520 521 522 523
        for (i = 2; isdigit(z[i]) || (z[i] >= 'a' && z[i] <= 'f') || (z[i] >= 'A' && z[i] <= 'F'); ++i) {
        }

        if (i == 2) {
          break;
        }

        return i;
      }
H
hzcheng 已提交
524 525 526 527 528 529 530 531 532 533
    }
    case '1':
    case '2':
    case '3':
    case '4':
    case '5':
    case '6':
    case '7':
    case '8':
    case '9': {
534
      *tokenId = TK_NK_INTEGER;
H
hzcheng 已提交
535 536 537
      for (i = 1; isdigit(z[i]); i++) {
      }

H
Haojun Liao 已提交
538
      /* here is the 1u/1a/2s/3m/9y */
539
      if ((z[i] == 'b' || z[i] == 'u' || z[i] == 'a' || z[i] == 's' || z[i] == 'm' || z[i] == 'h' || z[i] == 'd' || z[i] == 'n' ||
H
Haojun Liao 已提交
540
           z[i] == 'y' || z[i] == 'w' ||
541
           z[i] == 'B' || z[i] == 'U' || z[i] == 'A' || z[i] == 'S' || z[i] == 'M' || z[i] == 'H' || z[i] == 'D' || z[i] == 'N' ||
H
Haojun Liao 已提交
542
           z[i] == 'Y' || z[i] == 'W') &&
H
hjxilinx 已提交
543
          (isIdChar[(uint8_t)z[i + 1]] == 0)) {
544
        *tokenId = TK_NK_VARIABLE;
H
hzcheng 已提交
545 546 547 548 549 550 551 552 553 554
        i += 1;
        return i;
      }

      int32_t seg = 1;
      while (z[i] == '.' && isdigit(z[i + 1])) {
        i += 2;
        while (isdigit(z[i])) {
          i++;
        }
555
        *tokenId = TK_NK_FLOAT;
H
hzcheng 已提交
556 557 558 559
        seg++;
      }

      if (seg == 4) {  // ip address
560
        *tokenId = TK_NK_IPTOKEN;
H
hzcheng 已提交
561 562 563 564 565 566 567 568 569
        return i;
      }

      if ((z[i] == 'e' || z[i] == 'E') &&
          (isdigit(z[i + 1]) || ((z[i + 1] == '+' || z[i + 1] == '-') && isdigit(z[i + 2])))) {
        i += 2;
        while (isdigit(z[i])) {
          i++;
        }
570
        *tokenId = TK_NK_FLOAT;
H
hzcheng 已提交
571 572 573 574 575 576
      }
      return i;
    }
    case '[': {
      for (i = 1; z[i] && z[i - 1] != ']'; i++) {
      }
577
      *tokenId = TK_NK_ID;
H
hzcheng 已提交
578 579 580 581 582 583
      return i;
    }
    case 'T':
    case 't':
    case 'F':
    case 'f': {
H
hjxilinx 已提交
584
      for (i = 1; ((z[i] & 0x80) == 0) && isIdChar[(uint8_t) z[i]]; i++) {
H
hzcheng 已提交
585 586 587
      }

      if ((i == 4 && strncasecmp(z, "true", 4) == 0) || (i == 5 && strncasecmp(z, "false", 5) == 0)) {
588
        *tokenId = TK_NK_BOOL;
H
hzcheng 已提交
589 590 591 592
        return i;
      }
    }
    default: {
H
hjxilinx 已提交
593
      if (((*z & 0x80) != 0) || !isIdChar[(uint8_t) *z]) {
H
hzcheng 已提交
594 595
        break;
      }
H
hjxilinx 已提交
596
      for (i = 1; ((z[i] & 0x80) == 0) && isIdChar[(uint8_t) z[i]]; i++) {
H
hzcheng 已提交
597
      }
598
      *tokenId = tKeywordCode(z, i);
H
hzcheng 已提交
599 600 601 602
      return i;
    }
  }

603
  *tokenId = TK_NK_ILLEGAL;
H
hzcheng 已提交
604 605 606
  return 0;
}

H
Haojun Liao 已提交
607
SToken tscReplaceStrToken(char **str, SToken *token, const char* newToken) {
D
dapan1121 已提交
608
  char *src = *str;
D
dapan1121 已提交
609 610 611
  size_t nsize = strlen(newToken);
  int32_t size = (int32_t)strlen(*str) - token->n + (int32_t)nsize + 1;
  int32_t bsize = (int32_t)((uint64_t)token->z - (uint64_t)src);
H
Haojun Liao 已提交
612
  SToken ntoken;
D
dapan1121 已提交
613

wafwerar's avatar
wafwerar 已提交
614
  *str = taosMemoryCalloc(1, size);
D
dapan1121 已提交
615 616

  strncpy(*str, src, bsize);
H
Haojun Liao 已提交
617
  strcat(*str, newToken);
D
dapan1121 已提交
618 619
  strcat(*str, token->z + token->n);

D
dapan1121 已提交
620
  ntoken.n = (uint32_t)nsize;
D
dapan1121 已提交
621 622
  ntoken.z = *str + bsize;

wafwerar's avatar
wafwerar 已提交
623
  taosMemoryFreeClear(src);
D
dapan1121 已提交
624 625 626 627

  return ntoken;
}

628
SToken tStrGetToken(const char* str, int32_t* i, bool isPrevOptr) {
H
Haojun Liao 已提交
629
  SToken t0 = {0};
S
slguan 已提交
630

H
hzcheng 已提交
631 632
  // here we reach the end of sql string, null-terminated string
  if (str[*i] == 0) {
S
slguan 已提交
633 634
    t0.n = 0;
    return t0;
H
hzcheng 已提交
635 636
  }

637
  // IGNORE TK_NK_SPACE, TK_NK_COMMA, and specified tokens
S
slguan 已提交
638 639 640
  while (1) {
    *i += t0.n;

H
Haojun Liao 已提交
641 642 643 644 645 646
    int32_t numOfComma = 0;
    char t = str[*i];
    while (t == ' ' || t == '\n' || t == '\r' || t == '\t' || t == '\f' || t == ',') {
      if (t == ',' && (++numOfComma > 1)) {  // comma only allowed once
        t0.n = 0;
        return t0;
S
slguan 已提交
647
      }
H
Haojun Liao 已提交
648
    
H
Haojun Liao 已提交
649
      t = str[++(*i)];
S
slguan 已提交
650
    }
H
hzcheng 已提交
651

652
    t0.n = tGetToken(&str[*i], &t0.type);
H
Haojun Liao 已提交
653
    break;
S
slguan 已提交
654

H
Haojun Liao 已提交
655 656
    // not support user specfied ignored symbol list
#if 0
H
Haojun Liao 已提交
657
    bool ignore = false;
S
slguan 已提交
658 659
    for (uint32_t k = 0; k < numOfIgnoreToken; k++) {
      if (t0.type == ignoreTokenTypes[k]) {
H
Haojun Liao 已提交
660
        ignore = true;
S
slguan 已提交
661 662 663 664
        break;
      }
    }

H
Haojun Liao 已提交
665
    if (!ignore) {
S
slguan 已提交
666 667
      break;
    }
H
Haojun Liao 已提交
668
#endif
H
hzcheng 已提交
669 670
  }

671
  if (t0.type == TK_NK_SEMI) {
S
slguan 已提交
672 673 674 675 676 677 678 679 680
    t0.n = 0;
    return t0;
  }

  uint32_t type = 0;
  int32_t  len;

  // support parse the 'db.tbl' format, notes: There should be no space on either side of the dot!
  if ('.' == str[*i + t0.n]) {
681
    len = tGetToken(&str[*i + t0.n + 1], &type);
S
slguan 已提交
682 683

    // only id and string are valid
684
    if ((TK_NK_STRING != t0.type) && (TK_NK_ID != t0.type)) {
685
      t0.type = TK_NK_ILLEGAL;
S
slguan 已提交
686 687 688 689 690 691 692 693 694
      t0.n = 0;

      return t0;
    }

    t0.n += len + 1;

  } else {
    // support parse the -/+number format
X
Xiaoyu Wang 已提交
695
    if ((isPrevOptr) && (t0.type == TK_NK_MINUS || t0.type == TK_NK_PLUS)) {
696
      len = tGetToken(&str[*i + t0.n], &type);
697
      if (type == TK_NK_INTEGER || type == TK_NK_FLOAT) {
S
slguan 已提交
698 699 700
        t0.type = type;
        t0.n += len;
      }
H
hzcheng 已提交
701 702 703
    }
  }

704
  t0.z = (char*) str + (*i);
S
slguan 已提交
705 706 707
  *i += t0.n;

  return t0;
H
hzcheng 已提交
708 709
}

710
bool taosIsKeyWordToken(const char* z, int32_t len) {
711
  return (tKeywordCode((char*)z, len) != TK_NK_ID);
712
}
H
Haojun Liao 已提交
713 714

void taosCleanupKeywordsTable() {
H
Haojun Liao 已提交
715 716 717 718
  void* m = keywordHashTable;
  if (m != NULL && atomic_val_compare_exchange_ptr(&keywordHashTable, m, 0) == m) {
    taosHashCleanup(m);
  }
Y
yihaoDeng 已提交
719
}
720

H
Haojun Liao 已提交
721
SToken taosTokenDup(SToken* pToken, char* buf, int32_t len) {
722 723 724 725 726
  assert(pToken != NULL && buf != NULL && len > pToken->n);
  
  strncpy(buf, pToken->z, pToken->n);
  buf[pToken->n] = 0;

H
Haojun Liao 已提交
727
  SToken token = *pToken;
728 729 730
  token.z = buf;
  return token;
}