parTokenizer.c 20.5 KB
Newer Older
H
hzcheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

H
hjxilinx 已提交
16
#include "os.h"
X
Xiaoyu Wang 已提交
17
#include "parToken.h"
H
hjxilinx 已提交
18
#include "taosdef.h"
X
Xiaoyu Wang 已提交
19
#include "thash.h"
20
#include "ttokendef.h"
H
hzcheng 已提交
21

S
slguan 已提交
22
// All the keywords of the SQL language are stored in a hash table
H
hzcheng 已提交
23
typedef struct SKeyword {
S
slguan 已提交
24
  const char* name;  // The keyword name
H
Haojun Liao 已提交
25
  uint16_t    type;  // type
S
slguan 已提交
26
  uint8_t     len;   // length
H
hzcheng 已提交
27 28
} SKeyword;

X
Xiaoyu Wang 已提交
29
// clang-format off
S
slguan 已提交
30
// keywords in sql string
H
hzcheng 已提交
31
static SKeyword keywordTable[] = {
X
Xiaoyu Wang 已提交
32 33 34 35 36 37 38 39 40 41 42 43
    {"ACCOUNT",       TK_ACCOUNT},
    {"ACCOUNTS",      TK_ACCOUNTS},
    {"ADD",           TK_ADD},
    {"AGGREGATE",     TK_AGGREGATE},
    {"ALL",           TK_ALL},
    {"ALTER",         TK_ALTER},
    {"ANALYZE",       TK_ANALYZE},
    {"AND",           TK_AND},
    {"APPS",          TK_APPS},
    {"AS",            TK_AS},
    {"ASC",           TK_ASC},
    {"AT_ONCE",       TK_AT_ONCE},
X
Xiaoyu Wang 已提交
44
    {"BALANCE",       TK_BALANCE},
X
Xiaoyu Wang 已提交
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
    {"BETWEEN",       TK_BETWEEN},
    {"BINARY",        TK_BINARY},
    {"BIGINT",        TK_BIGINT},
    {"BNODE",         TK_BNODE},
    {"BNODES",        TK_BNODES},
    {"BOOL",          TK_BOOL},
    {"BUFFER",        TK_BUFFER},
    {"BUFSIZE",       TK_BUFSIZE},
    {"BY",            TK_BY},
    {"CACHE",         TK_CACHE},
    {"CACHELAST",     TK_CACHELAST},
    {"CAST",          TK_CAST},
    {"CLUSTER",       TK_CLUSTER},
    {"COLUMN",        TK_COLUMN},
    {"COMMENT",       TK_COMMENT},
    {"COMP",          TK_COMP},
    {"COMPACT",       TK_COMPACT},
    {"CONNS",         TK_CONNS},
    {"CONNECTION",    TK_CONNECTION},
    {"CONNECTIONS",   TK_CONNECTIONS},
65
    {"CONSUMER",      TK_CONSUMER},
X
Xiaoyu Wang 已提交
66 67
    {"COUNT",         TK_COUNT},
    {"CREATE",        TK_CREATE},
dengyihao's avatar
dengyihao 已提交
68
    {"CONTAINS",      TK_CONTAINS}, 
X
Xiaoyu Wang 已提交
69 70 71 72
    {"DATABASE",      TK_DATABASE},
    {"DATABASES",     TK_DATABASES},
    {"DAYS",          TK_DAYS},
    {"DBS",           TK_DBS},
X
Xiaoyu Wang 已提交
73
    {"DELETE",        TK_DELETE},
X
Xiaoyu Wang 已提交
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
    {"DESC",          TK_DESC},
    {"DESCRIBE",      TK_DESCRIBE},
    {"DISTINCT",      TK_DISTINCT},
    {"DNODE",         TK_DNODE},
    {"DNODES",        TK_DNODES},
    {"DOUBLE",        TK_DOUBLE},
    {"DROP",          TK_DROP},
    {"EXISTS",        TK_EXISTS},
    {"EXPLAIN",       TK_EXPLAIN},
    {"FILE_FACTOR",   TK_FILE_FACTOR},
    {"FILL",          TK_FILL},
    {"FIRST",         TK_FIRST},
    {"FLOAT",         TK_FLOAT},
    {"FROM",          TK_FROM},
    {"FSYNC",         TK_FSYNC},
    {"FUNCTION",      TK_FUNCTION},
    {"FUNCTIONS",     TK_FUNCTIONS},
91
    {"GRANT",         TK_GRANT},
X
Xiaoyu Wang 已提交
92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
    {"GRANTS",        TK_GRANTS},
    {"GROUP",         TK_GROUP},
    {"HAVING",        TK_HAVING},
    {"IF",            TK_IF},
    {"IMPORT",        TK_IMPORT},
    {"IN",            TK_IN},
    {"INDEX",         TK_INDEX},
    {"INDEXES",       TK_INDEXES},
    {"INNER",         TK_INNER},
    {"INT",           TK_INT},
    {"INSERT",        TK_INSERT},
    {"INTEGER",       TK_INTEGER},
    {"INTERVAL",      TK_INTERVAL},
    {"INTO",          TK_INTO},
    {"IS",            TK_IS},
    {"JOIN",          TK_JOIN},
    {"JSON",          TK_JSON},
    {"KEEP",          TK_KEEP},
    {"KILL",          TK_KILL},
    {"LAST",          TK_LAST},
    {"LAST_ROW",      TK_LAST_ROW},
    {"LICENCE",       TK_LICENCE},
    {"LIKE",          TK_LIKE},
    {"LIMIT",         TK_LIMIT},
    {"LINEAR",        TK_LINEAR},
    {"LOCAL",         TK_LOCAL},
    {"MATCH",         TK_MATCH},
    {"MAXROWS",       TK_MAXROWS},
X
Xiaoyu Wang 已提交
120
    {"MERGE",         TK_MERGE},
X
Xiaoyu Wang 已提交
121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
    {"MINROWS",       TK_MINROWS},
    {"MINUS",         TK_MINUS},
    {"MNODE",         TK_MNODE},
    {"MNODES",        TK_MNODES},
    {"MODIFY",        TK_MODIFY},
    {"MODULES",       TK_MODULES},
    {"NCHAR",         TK_NCHAR},
    {"NEXT",          TK_NEXT},
    {"NMATCH",        TK_NMATCH},
    {"NONE",          TK_NONE},
    {"NOT",           TK_NOT},
    {"NOW",           TK_NOW},
    {"NULL",          TK_NULL},
    {"NULLS",         TK_NULLS},
    {"OFFSET",        TK_OFFSET},
    {"ON",            TK_ON},
    {"OR",            TK_OR},
    {"ORDER",         TK_ORDER},
    {"OUTPUTTYPE",    TK_OUTPUTTYPE},
    {"PARTITION",     TK_PARTITION},
    {"PASS",          TK_PASS},
    {"PAGES",         TK_PAGES},
    {"PAGESIZE",      TK_PAGESIZE},
    {"PORT",          TK_PORT},
    {"PPS",           TK_PPS},
    {"PRECISION",     TK_PRECISION},
    {"PRIVILEGE",     TK_PRIVILEGE},
    {"PREV",          TK_PREV},
    {"QNODE",         TK_QNODE},
    {"QNODES",        TK_QNODES},
    {"QTIME",         TK_QTIME},
    {"QUERIES",       TK_QUERIES},
    {"QUERY",         TK_QUERY},
    {"RATIO",         TK_RATIO},
155
    {"READ",          TK_READ},
X
Xiaoyu Wang 已提交
156
    {"REDISTRIBUTE",  TK_REDISTRIBUTE},
157
    {"RENAME",        TK_RENAME},
X
Xiaoyu Wang 已提交
158 159 160
    {"REPLICA",       TK_REPLICA},
    {"RESET",         TK_RESET},
    {"RETENTIONS",    TK_RETENTIONS},
161
    {"REVOKE",        TK_REVOKE},
X
Xiaoyu Wang 已提交
162
    {"ROLLUP",        TK_ROLLUP},
X
Xiaoyu Wang 已提交
163
    {"SCHEMALESS",    TK_SCHEMALESS},
X
Xiaoyu Wang 已提交
164 165 166 167 168
    {"SCORES",        TK_SCORES},
    {"SELECT",        TK_SELECT},
    {"SESSION",       TK_SESSION},
    {"SET",           TK_SET},
    {"SHOW",          TK_SHOW},
169
    {"SINGLE_STABLE", TK_SINGLE_STABLE},
X
Xiaoyu Wang 已提交
170 171 172 173 174 175 176
    {"SLIDING",       TK_SLIDING},
    {"SLIMIT",        TK_SLIMIT},
    {"SMA",           TK_SMA},
    {"SMALLINT",      TK_SMALLINT},
    {"SNODE",         TK_SNODE},
    {"SNODES",        TK_SNODES},
    {"SOFFSET",       TK_SOFFSET},
X
Xiaoyu Wang 已提交
177
    // {"SPLIT",         TK_SPLIT},
X
Xiaoyu Wang 已提交
178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194
    {"STABLE",        TK_STABLE},
    {"STABLES",       TK_STABLES},
    {"STATE",         TK_STATE},
    {"STATE_WINDOW",  TK_STATE_WINDOW},
    {"STORAGE",       TK_STORAGE},
    {"STREAM",        TK_STREAM},
    {"STREAMS",       TK_STREAMS},
    {"STRICT",        TK_STRICT},
    {"SYNCDB",        TK_SYNCDB},
    {"TABLE",         TK_TABLE},
    {"TABLES",        TK_TABLES},
    {"TAG",           TK_TAG},
    {"TAGS",          TK_TAGS},
    {"TBNAME",        TK_TBNAME},
    {"TIMESTAMP",     TK_TIMESTAMP},
    {"TIMEZONE",      TK_TIMEZONE},
    {"TINYINT",       TK_TINYINT},
195
    {"TO",            TK_TO},
X
Xiaoyu Wang 已提交
196 197 198
    {"TODAY",         TK_TODAY},
    {"TOPIC",         TK_TOPIC},
    {"TOPICS",        TK_TOPICS},
199 200
    {"TRANSACTION",   TK_TRANSACTION},
    {"TRANSACTIONS",  TK_TRANSACTIONS},
X
Xiaoyu Wang 已提交
201 202 203 204 205 206 207 208 209 210 211 212 213 214
    {"TRIGGER",       TK_TRIGGER},
    {"TSERIES",       TK_TSERIES},
    {"TTL",           TK_TTL},
    {"UNION",         TK_UNION},
    {"UNSIGNED",      TK_UNSIGNED},
    {"USE",           TK_USE},
    {"USER",          TK_USER},
    {"USERS",         TK_USERS},
    {"USING",         TK_USING},
    {"VALUE",         TK_VALUE},
    {"VALUES",        TK_VALUES},
    {"VARCHAR",       TK_VARCHAR},
    {"VARIABLES",     TK_VARIABLES},
    {"VERBOSE",       TK_VERBOSE},
X
Xiaoyu Wang 已提交
215
    {"VGROUP",        TK_VGROUP},
X
Xiaoyu Wang 已提交
216 217 218 219 220 221
    {"VGROUPS",       TK_VGROUPS},
    {"VNODES",        TK_VNODES},
    {"WAL",           TK_WAL},
    {"WATERMARK",     TK_WATERMARK},
    {"WHERE",         TK_WHERE},
    {"WINDOW_CLOSE",  TK_WINDOW_CLOSE},
222
    {"WRITE",         TK_WRITE},
X
Xiaoyu Wang 已提交
223
    {"_C0",           TK_ROWTS},
X
Xiaoyu Wang 已提交
224 225 226 227 228 229
    {"_QENDTS",       TK_QENDTS},
    {"_QSTARTTS",     TK_QSTARTTS},
    {"_ROWTS",        TK_ROWTS},
    {"_WDURATION",    TK_WDURATION},
    {"_WENDTS",       TK_WENDTS},
    {"_WSTARTTS",     TK_WSTARTTS},
230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292
    // {"ID",           TK_ID},
    // {"STRING",       TK_STRING},
    // {"EQ",           TK_EQ},
    // {"NE",           TK_NE},
    // {"ISNULL",       TK_ISNULL},
    // {"NOTNULL",      TK_NOTNULL},
    // {"GLOB",         TK_GLOB},
    // {"GT",           TK_GT},
    // {"GE",           TK_GE},
    // {"LT",           TK_LT},
    // {"LE",           TK_LE},
    // {"BITAND",       TK_BITAND},
    // {"BITOR",        TK_BITOR},
    // {"LSHIFT",       TK_LSHIFT},
    // {"RSHIFT",       TK_RSHIFT},
    // {"PLUS",         TK_PLUS},
    // {"DIVIDE",       TK_DIVIDE},
    // {"TIMES",        TK_TIMES},
    // {"STAR",         TK_STAR},
    // {"SLASH",        TK_SLASH},
    // {"REM ",         TK_REM},
    // {"||",           TK_CONCAT},
    // {"UMINUS",       TK_UMINUS},
    // {"UPLUS",        TK_UPLUS},
    // {"BITNOT",       TK_BITNOT},
    // {"DOT",          TK_DOT},
    // {"CTIME",        TK_CTIME},
    // {"LP",           TK_LP},
    // {"RP",           TK_RP},
    // {"COMMA",        TK_COMMA},
    // {"EVERY",        TK_EVERY},
    // {"VARIABLE",     TK_VARIABLE},
    // {"UPDATE",       TK_UPDATE},
    // {"CHANGE",       TK_CHANGE},
    // {"COLON",        TK_COLON},
    // {"ABORT",        TK_ABORT},
    // {"AFTER",        TK_AFTER},
    // {"ATTACH",       TK_ATTACH},
    // {"BEFORE",       TK_BEFORE},
    // {"BEGIN",        TK_BEGIN},
    // {"CASCADE",      TK_CASCADE},
    // {"CONFLICT",     TK_CONFLICT},
    // {"COPY",         TK_COPY},
    // {"DEFERRED",     TK_DEFERRED},
    // {"DELIMITERS",   TK_DELIMITERS},
    // {"DETACH",       TK_DETACH},
    // {"EACH",         TK_EACH},
    // {"END",          TK_END},
    // {"FAIL",         TK_FAIL},
    // {"FOR",          TK_FOR},
    // {"IGNORE",       TK_IGNORE},
    // {"IMMEDIATE",    TK_IMMEDIATE},
    // {"INITIALLY",    TK_INITIALLY},
    // {"INSTEAD",      TK_INSTEAD},
    // {"KEY",          TK_KEY},
    // {"OF",           TK_OF},
    // {"RAISE",        TK_RAISE},
    // {"REPLACE",      TK_REPLACE},
    // {"RESTRICT",     TK_RESTRICT},
    // {"ROW",          TK_ROW},
    // {"STATEMENT",    TK_STATEMENT},
    // {"VIEW",         TK_VIEW},
    // {"SEMI",         TK_SEMI},
293
    // {"PARTITIONS",   TK_PARTITIONS},
294
    // {"MODE",         TK_MODE},
H
hzcheng 已提交
295
};
X
Xiaoyu Wang 已提交
296
// clang-format on
H
hzcheng 已提交
297 298 299 300 301 302 303 304 305 306 307 308 309

static const char isIdChar[] = {
    /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
};

H
Haojun Liao 已提交
310
static void* keywordHashTable = NULL;
H
hzcheng 已提交
311

S
TD-1057  
Shengliang Guan 已提交
312
static void doInitKeywordsTable(void) {
313
  int numOfEntries = tListLen(keywordTable);
X
Xiaoyu Wang 已提交
314

H
Haojun Liao 已提交
315
  keywordHashTable = taosHashInit(numOfEntries, MurmurHash3_32, true, false);
316
  for (int32_t i = 0; i < numOfEntries; i++) {
S
TD-1057  
Shengliang Guan 已提交
317
    keywordTable[i].len = (uint8_t)strlen(keywordTable[i].name);
318
    void* ptr = &keywordTable[i];
H
Haojun Liao 已提交
319
    taosHashPut(keywordHashTable, keywordTable[i].name, keywordTable[i].len, (void*)&ptr, POINTER_BYTES);
H
hzcheng 已提交
320
  }
321 322
}

wafwerar's avatar
wafwerar 已提交
323
static TdThreadOnce keywordsHashTableInit = PTHREAD_ONCE_INIT;
324

325
static int32_t tKeywordCode(const char* z, int n) {
wafwerar's avatar
wafwerar 已提交
326
  taosThreadOnce(&keywordsHashTableInit, doInitKeywordsTable);
X
Xiaoyu Wang 已提交
327

H
hjxilinx 已提交
328
  char key[512] = {0};
X
Xiaoyu Wang 已提交
329
  if (n > tListLen(key)) {  // too long token, can not be any other token type
330
    return TK_NK_ID;
331
  }
X
Xiaoyu Wang 已提交
332

H
hzcheng 已提交
333 334
  for (int32_t j = 0; j < n; ++j) {
    if (z[j] >= 'a' && z[j] <= 'z') {
335
      key[j] = (char)(z[j] & 0xDF);  // to uppercase and set the null-terminated
H
hzcheng 已提交
336 337 338 339 340
    } else {
      key[j] = z[j];
    }
  }

D
dapan1121 已提交
341
  if (keywordHashTable == NULL) {
342
    return TK_NK_ILLEGAL;
D
dapan1121 已提交
343
  }
H
Haojun Liao 已提交
344

H
Haojun Liao 已提交
345
  SKeyword** pKey = (SKeyword**)taosHashGet(keywordHashTable, key, n);
X
Xiaoyu Wang 已提交
346
  return (pKey != NULL) ? (*pKey)->type : TK_NK_ID;
H
hzcheng 已提交
347 348
}

H
huili 已提交
349
/*
350 351 352
 * Return the length of the token that begins at z[0].
 * Store the token type in *type before returning.
 */
353
uint32_t tGetToken(const char* z, uint32_t* tokenId) {
354
  uint32_t i;
H
hzcheng 已提交
355 356 357 358 359 360 361 362
  switch (*z) {
    case ' ':
    case '\t':
    case '\n':
    case '\f':
    case '\r': {
      for (i = 1; isspace(z[i]); i++) {
      }
363
      *tokenId = TK_NK_SPACE;
H
hzcheng 已提交
364 365 366
      return i;
    }
    case ':': {
367
      *tokenId = TK_NK_COLON;
H
hzcheng 已提交
368 369 370 371 372 373
      return 1;
    }
    case '-': {
      if (z[1] == '-') {
        for (i = 2; z[i] && z[i] != '\n'; i++) {
        }
374
        *tokenId = TK_NK_COMMENT;
H
hzcheng 已提交
375
        return i;
376 377 378
      } else if (z[1] == '>') {
        *tokenId = TK_NK_ARROW;
        return 2;
H
hzcheng 已提交
379
      }
X
Xiaoyu Wang 已提交
380
      *tokenId = TK_NK_MINUS;
H
hzcheng 已提交
381 382 383
      return 1;
    }
    case '(': {
384
      *tokenId = TK_NK_LP;
H
hzcheng 已提交
385 386 387
      return 1;
    }
    case ')': {
388
      *tokenId = TK_NK_RP;
H
hzcheng 已提交
389 390 391
      return 1;
    }
    case ';': {
392
      *tokenId = TK_NK_SEMI;
H
hzcheng 已提交
393 394 395
      return 1;
    }
    case '+': {
396
      *tokenId = TK_NK_PLUS;
H
hzcheng 已提交
397 398 399
      return 1;
    }
    case '*': {
400
      *tokenId = TK_NK_STAR;
H
hzcheng 已提交
401 402 403 404
      return 1;
    }
    case '/': {
      if (z[1] != '*' || z[2] == 0) {
405
        *tokenId = TK_NK_SLASH;
H
hzcheng 已提交
406 407 408 409 410
        return 1;
      }
      for (i = 3; z[i] && (z[i] != '/' || z[i - 1] != '*'); i++) {
      }
      if (z[i]) i++;
411
      *tokenId = TK_NK_COMMENT;
H
hzcheng 已提交
412 413 414
      return i;
    }
    case '%': {
415
      *tokenId = TK_NK_REM;
H
hzcheng 已提交
416 417 418
      return 1;
    }
    case '=': {
419
      *tokenId = TK_NK_EQ;
H
hzcheng 已提交
420 421 422 423
      return 1 + (z[1] == '=');
    }
    case '<': {
      if (z[1] == '=') {
424
        *tokenId = TK_NK_LE;
H
hzcheng 已提交
425 426
        return 2;
      } else if (z[1] == '>') {
427
        *tokenId = TK_NK_NE;
H
hzcheng 已提交
428 429
        return 2;
      } else if (z[1] == '<') {
430
        *tokenId = TK_NK_LSHIFT;
H
hzcheng 已提交
431 432
        return 2;
      } else {
433
        *tokenId = TK_NK_LT;
H
hzcheng 已提交
434 435 436 437 438
        return 1;
      }
    }
    case '>': {
      if (z[1] == '=') {
439
        *tokenId = TK_NK_GE;
H
hzcheng 已提交
440 441
        return 2;
      } else if (z[1] == '>') {
442
        *tokenId = TK_NK_RSHIFT;
H
hzcheng 已提交
443 444
        return 2;
      } else {
445
        *tokenId = TK_NK_GT;
H
hzcheng 已提交
446 447 448 449 450
        return 1;
      }
    }
    case '!': {
      if (z[1] != '=') {
451
        *tokenId = TK_NK_ILLEGAL;
H
hzcheng 已提交
452 453
        return 2;
      } else {
454
        *tokenId = TK_NK_NE;
H
hzcheng 已提交
455 456 457 458 459
        return 2;
      }
    }
    case '|': {
      if (z[1] != '|') {
460
        *tokenId = TK_NK_BITOR;
H
hzcheng 已提交
461 462
        return 1;
      } else {
463
        *tokenId = TK_NK_CONCAT;
H
hzcheng 已提交
464 465 466 467
        return 2;
      }
    }
    case ',': {
468
      *tokenId = TK_NK_COMMA;
H
hzcheng 已提交
469 470 471
      return 1;
    }
    case '&': {
472
      *tokenId = TK_NK_BITAND;
H
hzcheng 已提交
473 474 475
      return 1;
    }
    case '~': {
476
      *tokenId = TK_NK_BITNOT;
H
hzcheng 已提交
477 478
      return 1;
    }
S
slguan 已提交
479
    case '?': {
480
      *tokenId = TK_NK_QUESTION;
S
slguan 已提交
481 482
      return 1;
    }
483
    case '`':
H
hzcheng 已提交
484 485
    case '\'':
    case '"': {
S
slguan 已提交
486 487
      int  delim = z[0];
      bool strEnd = false;
H
hzcheng 已提交
488
      for (i = 1; z[i]; i++) {
489
        if (delim != '`' && z[i] == '\\') {  // ignore the escaped character that follows this backslash
L
[1292]  
lihui 已提交
490 491 492
          i++;
          continue;
        }
X
Xiaoyu Wang 已提交
493

494
        if (z[i] == delim) {
H
hzcheng 已提交
495 496 497
          if (z[i + 1] == delim) {
            i++;
          } else {
H
huili 已提交
498
            strEnd = true;
H
hzcheng 已提交
499 500 501 502
            break;
          }
        }
      }
X
Xiaoyu Wang 已提交
503

H
hzcheng 已提交
504
      if (z[i]) i++;
H
huili 已提交
505

S
slguan 已提交
506
      if (strEnd) {
X
Xiaoyu Wang 已提交
507
        *tokenId = (delim == '`') ? TK_NK_ID : TK_NK_STRING;
S
slguan 已提交
508 509
        return i;
      }
H
huili 已提交
510

S
slguan 已提交
511
      break;
H
hzcheng 已提交
512 513
    }
    case '.': {
S
slguan 已提交
514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530
      /*
       * handle the the float number with out integer part
       * .123
       * .123e4
       */
      if (isdigit(z[1])) {
        for (i = 2; isdigit(z[i]); i++) {
        }

        if ((z[i] == 'e' || z[i] == 'E') &&
            (isdigit(z[i + 1]) || ((z[i + 1] == '+' || z[i + 1] == '-') && isdigit(z[i + 2])))) {
          i += 2;
          while (isdigit(z[i])) {
            i++;
          }
        }

531
        *tokenId = TK_NK_FLOAT;
S
slguan 已提交
532 533
        return i;
      } else {
534
        *tokenId = TK_NK_DOT;
S
slguan 已提交
535 536 537 538 539 540 541
        return 1;
      }
    }

    case '0': {
      char next = z[1];

X
Xiaoyu Wang 已提交
542
      if (next == 'b') {  // bin number
543
        *tokenId = TK_NK_BIN;
S
slguan 已提交
544 545 546 547 548 549 550 551
        for (i = 2; (z[i] == '0' || z[i] == '1'); ++i) {
        }

        if (i == 2) {
          break;
        }

        return i;
X
Xiaoyu Wang 已提交
552
      } else if (next == 'x') {  // hex number
553
        *tokenId = TK_NK_HEX;
S
slguan 已提交
554 555 556 557 558 559 560 561 562
        for (i = 2; isdigit(z[i]) || (z[i] >= 'a' && z[i] <= 'f') || (z[i] >= 'A' && z[i] <= 'F'); ++i) {
        }

        if (i == 2) {
          break;
        }

        return i;
      }
H
hzcheng 已提交
563 564 565 566 567 568 569 570 571 572
    }
    case '1':
    case '2':
    case '3':
    case '4':
    case '5':
    case '6':
    case '7':
    case '8':
    case '9': {
573
      *tokenId = TK_NK_INTEGER;
H
hzcheng 已提交
574 575 576
      for (i = 1; isdigit(z[i]); i++) {
      }

H
Haojun Liao 已提交
577
      /* here is the 1u/1a/2s/3m/9y */
X
Xiaoyu Wang 已提交
578 579 580
      if ((z[i] == 'b' || z[i] == 'u' || z[i] == 'a' || z[i] == 's' || z[i] == 'm' || z[i] == 'h' || z[i] == 'd' ||
           z[i] == 'n' || z[i] == 'y' || z[i] == 'w' || z[i] == 'B' || z[i] == 'U' || z[i] == 'A' || z[i] == 'S' ||
           z[i] == 'M' || z[i] == 'H' || z[i] == 'D' || z[i] == 'N' || z[i] == 'Y' || z[i] == 'W') &&
H
hjxilinx 已提交
581
          (isIdChar[(uint8_t)z[i + 1]] == 0)) {
582
        *tokenId = TK_NK_VARIABLE;
H
hzcheng 已提交
583 584 585 586 587 588 589 590 591 592
        i += 1;
        return i;
      }

      int32_t seg = 1;
      while (z[i] == '.' && isdigit(z[i + 1])) {
        i += 2;
        while (isdigit(z[i])) {
          i++;
        }
593
        *tokenId = TK_NK_FLOAT;
H
hzcheng 已提交
594 595 596 597
        seg++;
      }

      if (seg == 4) {  // ip address
598
        *tokenId = TK_NK_IPTOKEN;
H
hzcheng 已提交
599
        return i;
X
Xiaoyu Wang 已提交
600 601
      } else if (seg > 2) {
        break;
H
hzcheng 已提交
602 603 604 605 606 607 608 609
      }

      if ((z[i] == 'e' || z[i] == 'E') &&
          (isdigit(z[i + 1]) || ((z[i + 1] == '+' || z[i + 1] == '-') && isdigit(z[i + 2])))) {
        i += 2;
        while (isdigit(z[i])) {
          i++;
        }
610
        *tokenId = TK_NK_FLOAT;
H
hzcheng 已提交
611 612 613
      }
      return i;
    }
X
Xiaoyu Wang 已提交
614 615 616 617 618 619
    // case '[': {
    //   for (i = 1; z[i] && z[i - 1] != ']'; i++) {
    //   }
    //   *tokenId = TK_NK_ID;
    //   return i;
    // }
H
hzcheng 已提交
620 621 622 623
    case 'T':
    case 't':
    case 'F':
    case 'f': {
X
Xiaoyu Wang 已提交
624
      for (i = 1; ((z[i] & 0x80) == 0) && isIdChar[(uint8_t)z[i]]; i++) {
H
hzcheng 已提交
625 626 627
      }

      if ((i == 4 && strncasecmp(z, "true", 4) == 0) || (i == 5 && strncasecmp(z, "false", 5) == 0)) {
628
        *tokenId = TK_NK_BOOL;
H
hzcheng 已提交
629 630 631 632
        return i;
      }
    }
    default: {
X
Xiaoyu Wang 已提交
633
      if (((*z & 0x80) != 0) || !isIdChar[(uint8_t)*z]) {
H
hzcheng 已提交
634 635
        break;
      }
X
Xiaoyu Wang 已提交
636
      for (i = 1; ((z[i] & 0x80) == 0) && isIdChar[(uint8_t)z[i]]; i++) {
H
hzcheng 已提交
637
      }
638
      *tokenId = tKeywordCode(z, i);
H
hzcheng 已提交
639 640 641 642
      return i;
    }
  }

643
  *tokenId = TK_NK_ILLEGAL;
H
hzcheng 已提交
644 645 646
  return 0;
}

X
Xiaoyu Wang 已提交
647 648 649
SToken tscReplaceStrToken(char** str, SToken* token, const char* newToken) {
  char*   src = *str;
  size_t  nsize = strlen(newToken);
D
dapan1121 已提交
650 651
  int32_t size = (int32_t)strlen(*str) - token->n + (int32_t)nsize + 1;
  int32_t bsize = (int32_t)((uint64_t)token->z - (uint64_t)src);
X
Xiaoyu Wang 已提交
652
  SToken  ntoken;
D
dapan1121 已提交
653

wafwerar's avatar
wafwerar 已提交
654
  *str = taosMemoryCalloc(1, size);
D
dapan1121 已提交
655 656

  strncpy(*str, src, bsize);
H
Haojun Liao 已提交
657
  strcat(*str, newToken);
D
dapan1121 已提交
658 659
  strcat(*str, token->z + token->n);

D
dapan1121 已提交
660
  ntoken.n = (uint32_t)nsize;
D
dapan1121 已提交
661 662
  ntoken.z = *str + bsize;

wafwerar's avatar
wafwerar 已提交
663
  taosMemoryFreeClear(src);
D
dapan1121 已提交
664 665 666 667

  return ntoken;
}

668
SToken tStrGetToken(const char* str, int32_t* i, bool isPrevOptr) {
H
Haojun Liao 已提交
669
  SToken t0 = {0};
S
slguan 已提交
670

H
hzcheng 已提交
671 672
  // here we reach the end of sql string, null-terminated string
  if (str[*i] == 0) {
S
slguan 已提交
673 674
    t0.n = 0;
    return t0;
H
hzcheng 已提交
675 676
  }

677
  // IGNORE TK_NK_SPACE, TK_NK_COMMA, and specified tokens
S
slguan 已提交
678 679 680
  while (1) {
    *i += t0.n;

H
Haojun Liao 已提交
681
    int32_t numOfComma = 0;
X
Xiaoyu Wang 已提交
682
    char    t = str[*i];
H
Haojun Liao 已提交
683 684 685 686
    while (t == ' ' || t == '\n' || t == '\r' || t == '\t' || t == '\f' || t == ',') {
      if (t == ',' && (++numOfComma > 1)) {  // comma only allowed once
        t0.n = 0;
        return t0;
S
slguan 已提交
687
      }
X
Xiaoyu Wang 已提交
688

H
Haojun Liao 已提交
689
      t = str[++(*i)];
S
slguan 已提交
690
    }
H
hzcheng 已提交
691

692
    t0.n = tGetToken(&str[*i], &t0.type);
H
Haojun Liao 已提交
693
    break;
S
slguan 已提交
694

H
Haojun Liao 已提交
695 696
    // not support user specfied ignored symbol list
#if 0
H
Haojun Liao 已提交
697
    bool ignore = false;
S
slguan 已提交
698 699
    for (uint32_t k = 0; k < numOfIgnoreToken; k++) {
      if (t0.type == ignoreTokenTypes[k]) {
H
Haojun Liao 已提交
700
        ignore = true;
S
slguan 已提交
701 702 703 704
        break;
      }
    }

H
Haojun Liao 已提交
705
    if (!ignore) {
S
slguan 已提交
706 707
      break;
    }
H
Haojun Liao 已提交
708
#endif
H
hzcheng 已提交
709 710
  }

711
  if (t0.type == TK_NK_SEMI) {
S
slguan 已提交
712
    t0.n = 0;
D
dapan1121 已提交
713
    t0.type = 0;
S
slguan 已提交
714 715 716 717 718 719 720 721
    return t0;
  }

  uint32_t type = 0;
  int32_t  len;

  // support parse the 'db.tbl' format, notes: There should be no space on either side of the dot!
  if ('.' == str[*i + t0.n]) {
722
    len = tGetToken(&str[*i + t0.n + 1], &type);
S
slguan 已提交
723 724

    // only id and string are valid
725
    if ((TK_NK_STRING != t0.type) && (TK_NK_ID != t0.type)) {
726
      t0.type = TK_NK_ILLEGAL;
S
slguan 已提交
727 728 729 730 731 732 733 734 735
      t0.n = 0;

      return t0;
    }

    t0.n += len + 1;

  } else {
    // support parse the -/+number format
X
Xiaoyu Wang 已提交
736
    if ((isPrevOptr) && (t0.type == TK_NK_MINUS || t0.type == TK_NK_PLUS)) {
737
      len = tGetToken(&str[*i + t0.n], &type);
738
      if (type == TK_NK_INTEGER || type == TK_NK_FLOAT) {
S
slguan 已提交
739 740 741
        t0.type = type;
        t0.n += len;
      }
H
hzcheng 已提交
742 743 744
    }
  }

X
Xiaoyu Wang 已提交
745
  t0.z = (char*)str + (*i);
S
slguan 已提交
746 747 748
  *i += t0.n;

  return t0;
H
hzcheng 已提交
749 750
}

X
Xiaoyu Wang 已提交
751
bool taosIsKeyWordToken(const char* z, int32_t len) { return (tKeywordCode((char*)z, len) != TK_NK_ID); }
H
Haojun Liao 已提交
752 753

void taosCleanupKeywordsTable() {
H
Haojun Liao 已提交
754 755 756 757
  void* m = keywordHashTable;
  if (m != NULL && atomic_val_compare_exchange_ptr(&keywordHashTable, m, 0) == m) {
    taosHashCleanup(m);
  }
Y
yihaoDeng 已提交
758
}
759

H
Haojun Liao 已提交
760
SToken taosTokenDup(SToken* pToken, char* buf, int32_t len) {
761
  assert(pToken != NULL && buf != NULL && len > pToken->n);
X
Xiaoyu Wang 已提交
762

763 764 765
  strncpy(buf, pToken->z, pToken->n);
  buf[pToken->n] = 0;

H
Haojun Liao 已提交
766
  SToken token = *pToken;
767 768 769
  token.z = buf;
  return token;
}