parTokenizer.c 20.6 KB
Newer Older
H
hzcheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

H
hjxilinx 已提交
16
#include "os.h"
X
Xiaoyu Wang 已提交
17
#include "parToken.h"
H
hjxilinx 已提交
18
#include "taosdef.h"
X
Xiaoyu Wang 已提交
19
#include "thash.h"
20
#include "ttokendef.h"
H
hzcheng 已提交
21

S
slguan 已提交
22
// All the keywords of the SQL language are stored in a hash table
H
hzcheng 已提交
23
typedef struct SKeyword {
S
slguan 已提交
24
  const char* name;  // The keyword name
H
Haojun Liao 已提交
25
  uint16_t    type;  // type
S
slguan 已提交
26
  uint8_t     len;   // length
H
hzcheng 已提交
27 28
} SKeyword;

X
Xiaoyu Wang 已提交
29
// clang-format off
S
slguan 已提交
30
// keywords in sql string
H
hzcheng 已提交
31
static SKeyword keywordTable[] = {
X
Xiaoyu Wang 已提交
32 33 34 35 36 37 38 39 40 41 42 43
    {"ACCOUNT",       TK_ACCOUNT},
    {"ACCOUNTS",      TK_ACCOUNTS},
    {"ADD",           TK_ADD},
    {"AGGREGATE",     TK_AGGREGATE},
    {"ALL",           TK_ALL},
    {"ALTER",         TK_ALTER},
    {"ANALYZE",       TK_ANALYZE},
    {"AND",           TK_AND},
    {"APPS",          TK_APPS},
    {"AS",            TK_AS},
    {"ASC",           TK_ASC},
    {"AT_ONCE",       TK_AT_ONCE},
X
Xiaoyu Wang 已提交
44
    {"BALANCE",       TK_BALANCE},
X
Xiaoyu Wang 已提交
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
    {"BETWEEN",       TK_BETWEEN},
    {"BINARY",        TK_BINARY},
    {"BIGINT",        TK_BIGINT},
    {"BNODE",         TK_BNODE},
    {"BNODES",        TK_BNODES},
    {"BOOL",          TK_BOOL},
    {"BUFFER",        TK_BUFFER},
    {"BUFSIZE",       TK_BUFSIZE},
    {"BY",            TK_BY},
    {"CACHE",         TK_CACHE},
    {"CACHELAST",     TK_CACHELAST},
    {"CAST",          TK_CAST},
    {"CLUSTER",       TK_CLUSTER},
    {"COLUMN",        TK_COLUMN},
    {"COMMENT",       TK_COMMENT},
    {"COMP",          TK_COMP},
    {"COMPACT",       TK_COMPACT},
    {"CONNS",         TK_CONNS},
    {"CONNECTION",    TK_CONNECTION},
    {"CONNECTIONS",   TK_CONNECTIONS},
65
    {"CONSUMER",      TK_CONSUMER},
X
Xiaoyu Wang 已提交
66 67
    {"COUNT",         TK_COUNT},
    {"CREATE",        TK_CREATE},
dengyihao's avatar
dengyihao 已提交
68
    {"CONTAINS",      TK_CONTAINS}, 
X
Xiaoyu Wang 已提交
69 70 71 72
    {"DATABASE",      TK_DATABASE},
    {"DATABASES",     TK_DATABASES},
    {"DAYS",          TK_DAYS},
    {"DBS",           TK_DBS},
X
Xiaoyu Wang 已提交
73
    {"DELETE",        TK_DELETE},
X
Xiaoyu Wang 已提交
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
    {"DESC",          TK_DESC},
    {"DESCRIBE",      TK_DESCRIBE},
    {"DISTINCT",      TK_DISTINCT},
    {"DNODE",         TK_DNODE},
    {"DNODES",        TK_DNODES},
    {"DOUBLE",        TK_DOUBLE},
    {"DROP",          TK_DROP},
    {"EXISTS",        TK_EXISTS},
    {"EXPLAIN",       TK_EXPLAIN},
    {"FILE_FACTOR",   TK_FILE_FACTOR},
    {"FILL",          TK_FILL},
    {"FIRST",         TK_FIRST},
    {"FLOAT",         TK_FLOAT},
    {"FROM",          TK_FROM},
    {"FSYNC",         TK_FSYNC},
    {"FUNCTION",      TK_FUNCTION},
    {"FUNCTIONS",     TK_FUNCTIONS},
91
    {"GRANT",         TK_GRANT},
X
Xiaoyu Wang 已提交
92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
    {"GRANTS",        TK_GRANTS},
    {"GROUP",         TK_GROUP},
    {"HAVING",        TK_HAVING},
    {"IF",            TK_IF},
    {"IMPORT",        TK_IMPORT},
    {"IN",            TK_IN},
    {"INDEX",         TK_INDEX},
    {"INDEXES",       TK_INDEXES},
    {"INNER",         TK_INNER},
    {"INT",           TK_INT},
    {"INSERT",        TK_INSERT},
    {"INTEGER",       TK_INTEGER},
    {"INTERVAL",      TK_INTERVAL},
    {"INTO",          TK_INTO},
    {"IS",            TK_IS},
    {"JOIN",          TK_JOIN},
    {"JSON",          TK_JSON},
    {"KEEP",          TK_KEEP},
    {"KILL",          TK_KILL},
    {"LAST",          TK_LAST},
    {"LAST_ROW",      TK_LAST_ROW},
    {"LICENCE",       TK_LICENCE},
    {"LIKE",          TK_LIKE},
    {"LIMIT",         TK_LIMIT},
    {"LINEAR",        TK_LINEAR},
    {"LOCAL",         TK_LOCAL},
    {"MATCH",         TK_MATCH},
    {"MAXROWS",       TK_MAXROWS},
120
    {"MAX_DELAY",     TK_MAX_DELAY},
X
Xiaoyu Wang 已提交
121
    {"MERGE",         TK_MERGE},
X
Xiaoyu Wang 已提交
122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
    {"MINROWS",       TK_MINROWS},
    {"MINUS",         TK_MINUS},
    {"MNODE",         TK_MNODE},
    {"MNODES",        TK_MNODES},
    {"MODIFY",        TK_MODIFY},
    {"MODULES",       TK_MODULES},
    {"NCHAR",         TK_NCHAR},
    {"NEXT",          TK_NEXT},
    {"NMATCH",        TK_NMATCH},
    {"NONE",          TK_NONE},
    {"NOT",           TK_NOT},
    {"NOW",           TK_NOW},
    {"NULL",          TK_NULL},
    {"NULLS",         TK_NULLS},
    {"OFFSET",        TK_OFFSET},
    {"ON",            TK_ON},
    {"OR",            TK_OR},
    {"ORDER",         TK_ORDER},
    {"OUTPUTTYPE",    TK_OUTPUTTYPE},
    {"PARTITION",     TK_PARTITION},
    {"PASS",          TK_PASS},
    {"PAGES",         TK_PAGES},
    {"PAGESIZE",      TK_PAGESIZE},
    {"PORT",          TK_PORT},
    {"PPS",           TK_PPS},
    {"PRECISION",     TK_PRECISION},
    {"PRIVILEGE",     TK_PRIVILEGE},
    {"PREV",          TK_PREV},
    {"QNODE",         TK_QNODE},
    {"QNODES",        TK_QNODES},
    {"QTIME",         TK_QTIME},
    {"QUERIES",       TK_QUERIES},
    {"QUERY",         TK_QUERY},
    {"RATIO",         TK_RATIO},
156
    {"READ",          TK_READ},
X
Xiaoyu Wang 已提交
157
    {"REDISTRIBUTE",  TK_REDISTRIBUTE},
158
    {"RENAME",        TK_RENAME},
X
Xiaoyu Wang 已提交
159 160 161
    {"REPLICA",       TK_REPLICA},
    {"RESET",         TK_RESET},
    {"RETENTIONS",    TK_RETENTIONS},
162
    {"REVOKE",        TK_REVOKE},
X
Xiaoyu Wang 已提交
163
    {"ROLLUP",        TK_ROLLUP},
X
Xiaoyu Wang 已提交
164
    {"SCHEMALESS",    TK_SCHEMALESS},
X
Xiaoyu Wang 已提交
165 166 167 168 169
    {"SCORES",        TK_SCORES},
    {"SELECT",        TK_SELECT},
    {"SESSION",       TK_SESSION},
    {"SET",           TK_SET},
    {"SHOW",          TK_SHOW},
170
    {"SINGLE_STABLE", TK_SINGLE_STABLE},
X
Xiaoyu Wang 已提交
171 172 173 174 175 176 177
    {"SLIDING",       TK_SLIDING},
    {"SLIMIT",        TK_SLIMIT},
    {"SMA",           TK_SMA},
    {"SMALLINT",      TK_SMALLINT},
    {"SNODE",         TK_SNODE},
    {"SNODES",        TK_SNODES},
    {"SOFFSET",       TK_SOFFSET},
178
    {"SPLIT",         TK_SPLIT},
X
Xiaoyu Wang 已提交
179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195
    {"STABLE",        TK_STABLE},
    {"STABLES",       TK_STABLES},
    {"STATE",         TK_STATE},
    {"STATE_WINDOW",  TK_STATE_WINDOW},
    {"STORAGE",       TK_STORAGE},
    {"STREAM",        TK_STREAM},
    {"STREAMS",       TK_STREAMS},
    {"STRICT",        TK_STRICT},
    {"SYNCDB",        TK_SYNCDB},
    {"TABLE",         TK_TABLE},
    {"TABLES",        TK_TABLES},
    {"TAG",           TK_TAG},
    {"TAGS",          TK_TAGS},
    {"TBNAME",        TK_TBNAME},
    {"TIMESTAMP",     TK_TIMESTAMP},
    {"TIMEZONE",      TK_TIMEZONE},
    {"TINYINT",       TK_TINYINT},
196
    {"TO",            TK_TO},
X
Xiaoyu Wang 已提交
197 198 199
    {"TODAY",         TK_TODAY},
    {"TOPIC",         TK_TOPIC},
    {"TOPICS",        TK_TOPICS},
200 201
    {"TRANSACTION",   TK_TRANSACTION},
    {"TRANSACTIONS",  TK_TRANSACTIONS},
X
Xiaoyu Wang 已提交
202 203 204 205 206 207 208 209 210 211 212 213 214 215
    {"TRIGGER",       TK_TRIGGER},
    {"TSERIES",       TK_TSERIES},
    {"TTL",           TK_TTL},
    {"UNION",         TK_UNION},
    {"UNSIGNED",      TK_UNSIGNED},
    {"USE",           TK_USE},
    {"USER",          TK_USER},
    {"USERS",         TK_USERS},
    {"USING",         TK_USING},
    {"VALUE",         TK_VALUE},
    {"VALUES",        TK_VALUES},
    {"VARCHAR",       TK_VARCHAR},
    {"VARIABLES",     TK_VARIABLES},
    {"VERBOSE",       TK_VERBOSE},
X
Xiaoyu Wang 已提交
216
    {"VGROUP",        TK_VGROUP},
X
Xiaoyu Wang 已提交
217 218 219 220 221 222
    {"VGROUPS",       TK_VGROUPS},
    {"VNODES",        TK_VNODES},
    {"WAL",           TK_WAL},
    {"WATERMARK",     TK_WATERMARK},
    {"WHERE",         TK_WHERE},
    {"WINDOW_CLOSE",  TK_WINDOW_CLOSE},
223
    {"WRITE",         TK_WRITE},
X
Xiaoyu Wang 已提交
224
    {"_C0",           TK_ROWTS},
X
Xiaoyu Wang 已提交
225 226 227 228 229 230
    {"_QENDTS",       TK_QENDTS},
    {"_QSTARTTS",     TK_QSTARTTS},
    {"_ROWTS",        TK_ROWTS},
    {"_WDURATION",    TK_WDURATION},
    {"_WENDTS",       TK_WENDTS},
    {"_WSTARTTS",     TK_WSTARTTS},
231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293
    // {"ID",           TK_ID},
    // {"STRING",       TK_STRING},
    // {"EQ",           TK_EQ},
    // {"NE",           TK_NE},
    // {"ISNULL",       TK_ISNULL},
    // {"NOTNULL",      TK_NOTNULL},
    // {"GLOB",         TK_GLOB},
    // {"GT",           TK_GT},
    // {"GE",           TK_GE},
    // {"LT",           TK_LT},
    // {"LE",           TK_LE},
    // {"BITAND",       TK_BITAND},
    // {"BITOR",        TK_BITOR},
    // {"LSHIFT",       TK_LSHIFT},
    // {"RSHIFT",       TK_RSHIFT},
    // {"PLUS",         TK_PLUS},
    // {"DIVIDE",       TK_DIVIDE},
    // {"TIMES",        TK_TIMES},
    // {"STAR",         TK_STAR},
    // {"SLASH",        TK_SLASH},
    // {"REM ",         TK_REM},
    // {"||",           TK_CONCAT},
    // {"UMINUS",       TK_UMINUS},
    // {"UPLUS",        TK_UPLUS},
    // {"BITNOT",       TK_BITNOT},
    // {"DOT",          TK_DOT},
    // {"CTIME",        TK_CTIME},
    // {"LP",           TK_LP},
    // {"RP",           TK_RP},
    // {"COMMA",        TK_COMMA},
    // {"EVERY",        TK_EVERY},
    // {"VARIABLE",     TK_VARIABLE},
    // {"UPDATE",       TK_UPDATE},
    // {"CHANGE",       TK_CHANGE},
    // {"COLON",        TK_COLON},
    // {"ABORT",        TK_ABORT},
    // {"AFTER",        TK_AFTER},
    // {"ATTACH",       TK_ATTACH},
    // {"BEFORE",       TK_BEFORE},
    // {"BEGIN",        TK_BEGIN},
    // {"CASCADE",      TK_CASCADE},
    // {"CONFLICT",     TK_CONFLICT},
    // {"COPY",         TK_COPY},
    // {"DEFERRED",     TK_DEFERRED},
    // {"DELIMITERS",   TK_DELIMITERS},
    // {"DETACH",       TK_DETACH},
    // {"EACH",         TK_EACH},
    // {"END",          TK_END},
    // {"FAIL",         TK_FAIL},
    // {"FOR",          TK_FOR},
    // {"IGNORE",       TK_IGNORE},
    // {"IMMEDIATE",    TK_IMMEDIATE},
    // {"INITIALLY",    TK_INITIALLY},
    // {"INSTEAD",      TK_INSTEAD},
    // {"KEY",          TK_KEY},
    // {"OF",           TK_OF},
    // {"RAISE",        TK_RAISE},
    // {"REPLACE",      TK_REPLACE},
    // {"RESTRICT",     TK_RESTRICT},
    // {"ROW",          TK_ROW},
    // {"STATEMENT",    TK_STATEMENT},
    // {"VIEW",         TK_VIEW},
    // {"SEMI",         TK_SEMI},
294
    // {"PARTITIONS",   TK_PARTITIONS},
295
    // {"MODE",         TK_MODE},
H
hzcheng 已提交
296
};
X
Xiaoyu Wang 已提交
297
// clang-format on
H
hzcheng 已提交
298 299 300 301 302 303 304 305 306 307 308 309 310

static const char isIdChar[] = {
    /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
};

H
Haojun Liao 已提交
311
static void* keywordHashTable = NULL;
H
hzcheng 已提交
312

S
TD-1057  
Shengliang Guan 已提交
313
static void doInitKeywordsTable(void) {
314
  int numOfEntries = tListLen(keywordTable);
X
Xiaoyu Wang 已提交
315

H
Haojun Liao 已提交
316
  keywordHashTable = taosHashInit(numOfEntries, MurmurHash3_32, true, false);
317
  for (int32_t i = 0; i < numOfEntries; i++) {
S
TD-1057  
Shengliang Guan 已提交
318
    keywordTable[i].len = (uint8_t)strlen(keywordTable[i].name);
319
    void* ptr = &keywordTable[i];
H
Haojun Liao 已提交
320
    taosHashPut(keywordHashTable, keywordTable[i].name, keywordTable[i].len, (void*)&ptr, POINTER_BYTES);
H
hzcheng 已提交
321
  }
322 323
}

wafwerar's avatar
wafwerar 已提交
324
static TdThreadOnce keywordsHashTableInit = PTHREAD_ONCE_INIT;
325

326
static int32_t tKeywordCode(const char* z, int n) {
wafwerar's avatar
wafwerar 已提交
327
  taosThreadOnce(&keywordsHashTableInit, doInitKeywordsTable);
X
Xiaoyu Wang 已提交
328

H
hjxilinx 已提交
329
  char key[512] = {0};
X
Xiaoyu Wang 已提交
330
  if (n > tListLen(key)) {  // too long token, can not be any other token type
331
    return TK_NK_ID;
332
  }
X
Xiaoyu Wang 已提交
333

H
hzcheng 已提交
334 335
  for (int32_t j = 0; j < n; ++j) {
    if (z[j] >= 'a' && z[j] <= 'z') {
336
      key[j] = (char)(z[j] & 0xDF);  // to uppercase and set the null-terminated
H
hzcheng 已提交
337 338 339 340 341
    } else {
      key[j] = z[j];
    }
  }

D
dapan1121 已提交
342
  if (keywordHashTable == NULL) {
343
    return TK_NK_ILLEGAL;
D
dapan1121 已提交
344
  }
H
Haojun Liao 已提交
345

H
Haojun Liao 已提交
346
  SKeyword** pKey = (SKeyword**)taosHashGet(keywordHashTable, key, n);
X
Xiaoyu Wang 已提交
347
  return (pKey != NULL) ? (*pKey)->type : TK_NK_ID;
H
hzcheng 已提交
348 349
}

H
huili 已提交
350
/*
351 352 353
 * Return the length of the token that begins at z[0].
 * Store the token type in *type before returning.
 */
354
uint32_t tGetToken(const char* z, uint32_t* tokenId) {
355
  uint32_t i;
H
hzcheng 已提交
356 357 358 359 360 361 362 363
  switch (*z) {
    case ' ':
    case '\t':
    case '\n':
    case '\f':
    case '\r': {
      for (i = 1; isspace(z[i]); i++) {
      }
364
      *tokenId = TK_NK_SPACE;
H
hzcheng 已提交
365 366 367
      return i;
    }
    case ':': {
368
      *tokenId = TK_NK_COLON;
H
hzcheng 已提交
369 370 371 372 373 374
      return 1;
    }
    case '-': {
      if (z[1] == '-') {
        for (i = 2; z[i] && z[i] != '\n'; i++) {
        }
375
        *tokenId = TK_NK_COMMENT;
H
hzcheng 已提交
376
        return i;
377 378 379
      } else if (z[1] == '>') {
        *tokenId = TK_NK_ARROW;
        return 2;
H
hzcheng 已提交
380
      }
X
Xiaoyu Wang 已提交
381
      *tokenId = TK_NK_MINUS;
H
hzcheng 已提交
382 383 384
      return 1;
    }
    case '(': {
385
      *tokenId = TK_NK_LP;
H
hzcheng 已提交
386 387 388
      return 1;
    }
    case ')': {
389
      *tokenId = TK_NK_RP;
H
hzcheng 已提交
390 391 392
      return 1;
    }
    case ';': {
393
      *tokenId = TK_NK_SEMI;
H
hzcheng 已提交
394 395 396
      return 1;
    }
    case '+': {
397
      *tokenId = TK_NK_PLUS;
H
hzcheng 已提交
398 399 400
      return 1;
    }
    case '*': {
401
      *tokenId = TK_NK_STAR;
H
hzcheng 已提交
402 403 404 405
      return 1;
    }
    case '/': {
      if (z[1] != '*' || z[2] == 0) {
406
        *tokenId = TK_NK_SLASH;
H
hzcheng 已提交
407 408 409 410 411
        return 1;
      }
      for (i = 3; z[i] && (z[i] != '/' || z[i - 1] != '*'); i++) {
      }
      if (z[i]) i++;
412
      *tokenId = TK_NK_COMMENT;
H
hzcheng 已提交
413 414 415
      return i;
    }
    case '%': {
416
      *tokenId = TK_NK_REM;
H
hzcheng 已提交
417 418 419
      return 1;
    }
    case '=': {
420
      *tokenId = TK_NK_EQ;
H
hzcheng 已提交
421 422 423 424
      return 1 + (z[1] == '=');
    }
    case '<': {
      if (z[1] == '=') {
425
        *tokenId = TK_NK_LE;
H
hzcheng 已提交
426 427
        return 2;
      } else if (z[1] == '>') {
428
        *tokenId = TK_NK_NE;
H
hzcheng 已提交
429 430
        return 2;
      } else if (z[1] == '<') {
431
        *tokenId = TK_NK_LSHIFT;
H
hzcheng 已提交
432 433
        return 2;
      } else {
434
        *tokenId = TK_NK_LT;
H
hzcheng 已提交
435 436 437 438 439
        return 1;
      }
    }
    case '>': {
      if (z[1] == '=') {
440
        *tokenId = TK_NK_GE;
H
hzcheng 已提交
441 442
        return 2;
      } else if (z[1] == '>') {
443
        *tokenId = TK_NK_RSHIFT;
H
hzcheng 已提交
444 445
        return 2;
      } else {
446
        *tokenId = TK_NK_GT;
H
hzcheng 已提交
447 448 449 450 451
        return 1;
      }
    }
    case '!': {
      if (z[1] != '=') {
452
        *tokenId = TK_NK_ILLEGAL;
H
hzcheng 已提交
453 454
        return 2;
      } else {
455
        *tokenId = TK_NK_NE;
H
hzcheng 已提交
456 457 458 459 460
        return 2;
      }
    }
    case '|': {
      if (z[1] != '|') {
461
        *tokenId = TK_NK_BITOR;
H
hzcheng 已提交
462 463
        return 1;
      } else {
464
        *tokenId = TK_NK_CONCAT;
H
hzcheng 已提交
465 466 467 468
        return 2;
      }
    }
    case ',': {
469
      *tokenId = TK_NK_COMMA;
H
hzcheng 已提交
470 471 472
      return 1;
    }
    case '&': {
473
      *tokenId = TK_NK_BITAND;
H
hzcheng 已提交
474 475 476
      return 1;
    }
    case '~': {
477
      *tokenId = TK_NK_BITNOT;
H
hzcheng 已提交
478 479
      return 1;
    }
S
slguan 已提交
480
    case '?': {
481
      *tokenId = TK_NK_QUESTION;
S
slguan 已提交
482 483
      return 1;
    }
484
    case '`':
H
hzcheng 已提交
485 486
    case '\'':
    case '"': {
S
slguan 已提交
487 488
      int  delim = z[0];
      bool strEnd = false;
H
hzcheng 已提交
489
      for (i = 1; z[i]; i++) {
490
        if (delim != '`' && z[i] == '\\') {  // ignore the escaped character that follows this backslash
L
[1292]  
lihui 已提交
491 492 493
          i++;
          continue;
        }
X
Xiaoyu Wang 已提交
494

495
        if (z[i] == delim) {
H
hzcheng 已提交
496 497 498
          if (z[i + 1] == delim) {
            i++;
          } else {
H
huili 已提交
499
            strEnd = true;
H
hzcheng 已提交
500 501 502 503
            break;
          }
        }
      }
X
Xiaoyu Wang 已提交
504

H
hzcheng 已提交
505
      if (z[i]) i++;
H
huili 已提交
506

S
slguan 已提交
507
      if (strEnd) {
X
Xiaoyu Wang 已提交
508
        *tokenId = (delim == '`') ? TK_NK_ID : TK_NK_STRING;
S
slguan 已提交
509 510
        return i;
      }
H
huili 已提交
511

S
slguan 已提交
512
      break;
H
hzcheng 已提交
513 514
    }
    case '.': {
S
slguan 已提交
515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531
      /*
       * handle the the float number with out integer part
       * .123
       * .123e4
       */
      if (isdigit(z[1])) {
        for (i = 2; isdigit(z[i]); i++) {
        }

        if ((z[i] == 'e' || z[i] == 'E') &&
            (isdigit(z[i + 1]) || ((z[i + 1] == '+' || z[i + 1] == '-') && isdigit(z[i + 2])))) {
          i += 2;
          while (isdigit(z[i])) {
            i++;
          }
        }

532
        *tokenId = TK_NK_FLOAT;
S
slguan 已提交
533 534
        return i;
      } else {
535
        *tokenId = TK_NK_DOT;
S
slguan 已提交
536 537 538 539 540 541 542
        return 1;
      }
    }

    case '0': {
      char next = z[1];

X
Xiaoyu Wang 已提交
543
      if (next == 'b') {  // bin number
544
        *tokenId = TK_NK_BIN;
S
slguan 已提交
545 546 547 548 549 550 551 552
        for (i = 2; (z[i] == '0' || z[i] == '1'); ++i) {
        }

        if (i == 2) {
          break;
        }

        return i;
X
Xiaoyu Wang 已提交
553
      } else if (next == 'x') {  // hex number
554
        *tokenId = TK_NK_HEX;
S
slguan 已提交
555 556 557 558 559 560 561 562 563
        for (i = 2; isdigit(z[i]) || (z[i] >= 'a' && z[i] <= 'f') || (z[i] >= 'A' && z[i] <= 'F'); ++i) {
        }

        if (i == 2) {
          break;
        }

        return i;
      }
H
hzcheng 已提交
564 565 566 567 568 569 570 571 572 573
    }
    case '1':
    case '2':
    case '3':
    case '4':
    case '5':
    case '6':
    case '7':
    case '8':
    case '9': {
574
      *tokenId = TK_NK_INTEGER;
H
hzcheng 已提交
575 576 577
      for (i = 1; isdigit(z[i]); i++) {
      }

H
Haojun Liao 已提交
578
      /* here is the 1u/1a/2s/3m/9y */
X
Xiaoyu Wang 已提交
579 580 581
      if ((z[i] == 'b' || z[i] == 'u' || z[i] == 'a' || z[i] == 's' || z[i] == 'm' || z[i] == 'h' || z[i] == 'd' ||
           z[i] == 'n' || z[i] == 'y' || z[i] == 'w' || z[i] == 'B' || z[i] == 'U' || z[i] == 'A' || z[i] == 'S' ||
           z[i] == 'M' || z[i] == 'H' || z[i] == 'D' || z[i] == 'N' || z[i] == 'Y' || z[i] == 'W') &&
H
hjxilinx 已提交
582
          (isIdChar[(uint8_t)z[i + 1]] == 0)) {
583
        *tokenId = TK_NK_VARIABLE;
H
hzcheng 已提交
584 585 586 587 588 589 590 591 592 593
        i += 1;
        return i;
      }

      int32_t seg = 1;
      while (z[i] == '.' && isdigit(z[i + 1])) {
        i += 2;
        while (isdigit(z[i])) {
          i++;
        }
594
        *tokenId = TK_NK_FLOAT;
H
hzcheng 已提交
595 596 597 598
        seg++;
      }

      if (seg == 4) {  // ip address
599
        *tokenId = TK_NK_IPTOKEN;
H
hzcheng 已提交
600
        return i;
X
Xiaoyu Wang 已提交
601 602
      } else if (seg > 2) {
        break;
H
hzcheng 已提交
603 604 605 606 607 608 609 610
      }

      if ((z[i] == 'e' || z[i] == 'E') &&
          (isdigit(z[i + 1]) || ((z[i + 1] == '+' || z[i + 1] == '-') && isdigit(z[i + 2])))) {
        i += 2;
        while (isdigit(z[i])) {
          i++;
        }
611
        *tokenId = TK_NK_FLOAT;
H
hzcheng 已提交
612 613 614
      }
      return i;
    }
X
Xiaoyu Wang 已提交
615 616 617 618 619 620
    // case '[': {
    //   for (i = 1; z[i] && z[i - 1] != ']'; i++) {
    //   }
    //   *tokenId = TK_NK_ID;
    //   return i;
    // }
H
hzcheng 已提交
621 622 623 624
    case 'T':
    case 't':
    case 'F':
    case 'f': {
X
Xiaoyu Wang 已提交
625
      for (i = 1; ((z[i] & 0x80) == 0) && isIdChar[(uint8_t)z[i]]; i++) {
H
hzcheng 已提交
626 627 628
      }

      if ((i == 4 && strncasecmp(z, "true", 4) == 0) || (i == 5 && strncasecmp(z, "false", 5) == 0)) {
629
        *tokenId = TK_NK_BOOL;
H
hzcheng 已提交
630 631 632 633
        return i;
      }
    }
    default: {
X
Xiaoyu Wang 已提交
634
      if (((*z & 0x80) != 0) || !isIdChar[(uint8_t)*z]) {
H
hzcheng 已提交
635 636
        break;
      }
X
Xiaoyu Wang 已提交
637
      for (i = 1; ((z[i] & 0x80) == 0) && isIdChar[(uint8_t)z[i]]; i++) {
H
hzcheng 已提交
638
      }
639
      *tokenId = tKeywordCode(z, i);
H
hzcheng 已提交
640 641 642 643
      return i;
    }
  }

644
  *tokenId = TK_NK_ILLEGAL;
H
hzcheng 已提交
645 646 647
  return 0;
}

X
Xiaoyu Wang 已提交
648 649 650
SToken tscReplaceStrToken(char** str, SToken* token, const char* newToken) {
  char*   src = *str;
  size_t  nsize = strlen(newToken);
D
dapan1121 已提交
651 652
  int32_t size = (int32_t)strlen(*str) - token->n + (int32_t)nsize + 1;
  int32_t bsize = (int32_t)((uint64_t)token->z - (uint64_t)src);
X
Xiaoyu Wang 已提交
653
  SToken  ntoken;
D
dapan1121 已提交
654

wafwerar's avatar
wafwerar 已提交
655
  *str = taosMemoryCalloc(1, size);
D
dapan1121 已提交
656 657

  strncpy(*str, src, bsize);
H
Haojun Liao 已提交
658
  strcat(*str, newToken);
D
dapan1121 已提交
659 660
  strcat(*str, token->z + token->n);

D
dapan1121 已提交
661
  ntoken.n = (uint32_t)nsize;
D
dapan1121 已提交
662 663
  ntoken.z = *str + bsize;

wafwerar's avatar
wafwerar 已提交
664
  taosMemoryFreeClear(src);
D
dapan1121 已提交
665 666 667 668

  return ntoken;
}

669
SToken tStrGetToken(const char* str, int32_t* i, bool isPrevOptr) {
H
Haojun Liao 已提交
670
  SToken t0 = {0};
S
slguan 已提交
671

H
hzcheng 已提交
672 673
  // here we reach the end of sql string, null-terminated string
  if (str[*i] == 0) {
S
slguan 已提交
674 675
    t0.n = 0;
    return t0;
H
hzcheng 已提交
676 677
  }

678
  // IGNORE TK_NK_SPACE, TK_NK_COMMA, and specified tokens
S
slguan 已提交
679 680 681
  while (1) {
    *i += t0.n;

H
Haojun Liao 已提交
682
    int32_t numOfComma = 0;
X
Xiaoyu Wang 已提交
683
    char    t = str[*i];
H
Haojun Liao 已提交
684 685 686 687
    while (t == ' ' || t == '\n' || t == '\r' || t == '\t' || t == '\f' || t == ',') {
      if (t == ',' && (++numOfComma > 1)) {  // comma only allowed once
        t0.n = 0;
        return t0;
S
slguan 已提交
688
      }
X
Xiaoyu Wang 已提交
689

H
Haojun Liao 已提交
690
      t = str[++(*i)];
S
slguan 已提交
691
    }
H
hzcheng 已提交
692

693
    t0.n = tGetToken(&str[*i], &t0.type);
H
Haojun Liao 已提交
694
    break;
S
slguan 已提交
695

H
Haojun Liao 已提交
696 697
    // not support user specfied ignored symbol list
#if 0
H
Haojun Liao 已提交
698
    bool ignore = false;
S
slguan 已提交
699 700
    for (uint32_t k = 0; k < numOfIgnoreToken; k++) {
      if (t0.type == ignoreTokenTypes[k]) {
H
Haojun Liao 已提交
701
        ignore = true;
S
slguan 已提交
702 703 704 705
        break;
      }
    }

H
Haojun Liao 已提交
706
    if (!ignore) {
S
slguan 已提交
707 708
      break;
    }
H
Haojun Liao 已提交
709
#endif
H
hzcheng 已提交
710 711
  }

712
  if (t0.type == TK_NK_SEMI) {
S
slguan 已提交
713
    t0.n = 0;
D
dapan1121 已提交
714
    t0.type = 0;
S
slguan 已提交
715 716 717 718 719 720 721 722
    return t0;
  }

  uint32_t type = 0;
  int32_t  len;

  // support parse the 'db.tbl' format, notes: There should be no space on either side of the dot!
  if ('.' == str[*i + t0.n]) {
723
    len = tGetToken(&str[*i + t0.n + 1], &type);
S
slguan 已提交
724 725

    // only id and string are valid
726
    if ((TK_NK_STRING != t0.type) && (TK_NK_ID != t0.type)) {
727
      t0.type = TK_NK_ILLEGAL;
S
slguan 已提交
728 729 730 731 732 733 734 735 736
      t0.n = 0;

      return t0;
    }

    t0.n += len + 1;

  } else {
    // support parse the -/+number format
X
Xiaoyu Wang 已提交
737
    if ((isPrevOptr) && (t0.type == TK_NK_MINUS || t0.type == TK_NK_PLUS)) {
738
      len = tGetToken(&str[*i + t0.n], &type);
739
      if (type == TK_NK_INTEGER || type == TK_NK_FLOAT) {
S
slguan 已提交
740 741 742
        t0.type = type;
        t0.n += len;
      }
H
hzcheng 已提交
743 744 745
    }
  }

X
Xiaoyu Wang 已提交
746
  t0.z = (char*)str + (*i);
S
slguan 已提交
747 748 749
  *i += t0.n;

  return t0;
H
hzcheng 已提交
750 751
}

X
Xiaoyu Wang 已提交
752
bool taosIsKeyWordToken(const char* z, int32_t len) { return (tKeywordCode((char*)z, len) != TK_NK_ID); }
H
Haojun Liao 已提交
753 754

void taosCleanupKeywordsTable() {
H
Haojun Liao 已提交
755 756 757 758
  void* m = keywordHashTable;
  if (m != NULL && atomic_val_compare_exchange_ptr(&keywordHashTable, m, 0) == m) {
    taosHashCleanup(m);
  }
Y
yihaoDeng 已提交
759
}
760

H
Haojun Liao 已提交
761
SToken taosTokenDup(SToken* pToken, char* buf, int32_t len) {
762
  assert(pToken != NULL && buf != NULL && len > pToken->n);
X
Xiaoyu Wang 已提交
763

764 765 766
  strncpy(buf, pToken->z, pToken->n);
  buf[pToken->n] = 0;

H
Haojun Liao 已提交
767
  SToken token = *pToken;
768 769 770
  token.z = buf;
  return token;
}