parTokenizer.c 20.4 KB
Newer Older
H
hzcheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

H
hjxilinx 已提交
16
#include "os.h"
X
Xiaoyu Wang 已提交
17
#include "parToken.h"
H
hjxilinx 已提交
18
#include "taosdef.h"
X
Xiaoyu Wang 已提交
19
#include "thash.h"
20
#include "ttokendef.h"
H
hzcheng 已提交
21

S
slguan 已提交
22
// All the keywords of the SQL language are stored in a hash table
H
hzcheng 已提交
23
typedef struct SKeyword {
S
slguan 已提交
24
  const char* name;  // The keyword name
H
Haojun Liao 已提交
25
  uint16_t    type;  // type
S
slguan 已提交
26
  uint8_t     len;   // length
H
hzcheng 已提交
27 28
} SKeyword;

X
Xiaoyu Wang 已提交
29
// clang-format off
S
slguan 已提交
30
// keywords in sql string
H
hzcheng 已提交
31
static SKeyword keywordTable[] = {
X
Xiaoyu Wang 已提交
32 33 34 35 36 37 38 39 40 41 42 43
    {"ACCOUNT",       TK_ACCOUNT},
    {"ACCOUNTS",      TK_ACCOUNTS},
    {"ADD",           TK_ADD},
    {"AGGREGATE",     TK_AGGREGATE},
    {"ALL",           TK_ALL},
    {"ALTER",         TK_ALTER},
    {"ANALYZE",       TK_ANALYZE},
    {"AND",           TK_AND},
    {"APPS",          TK_APPS},
    {"AS",            TK_AS},
    {"ASC",           TK_ASC},
    {"AT_ONCE",       TK_AT_ONCE},
X
Xiaoyu Wang 已提交
44
    {"BALANCE",       TK_BALANCE},
X
Xiaoyu Wang 已提交
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
    {"BETWEEN",       TK_BETWEEN},
    {"BINARY",        TK_BINARY},
    {"BIGINT",        TK_BIGINT},
    {"BNODE",         TK_BNODE},
    {"BNODES",        TK_BNODES},
    {"BOOL",          TK_BOOL},
    {"BUFFER",        TK_BUFFER},
    {"BUFSIZE",       TK_BUFSIZE},
    {"BY",            TK_BY},
    {"CACHE",         TK_CACHE},
    {"CACHELAST",     TK_CACHELAST},
    {"CAST",          TK_CAST},
    {"CLUSTER",       TK_CLUSTER},
    {"COLUMN",        TK_COLUMN},
    {"COMMENT",       TK_COMMENT},
    {"COMP",          TK_COMP},
    {"COMPACT",       TK_COMPACT},
    {"CONNS",         TK_CONNS},
    {"CONNECTION",    TK_CONNECTION},
    {"CONNECTIONS",   TK_CONNECTIONS},
65
    {"CONSUMER",      TK_CONSUMER},
X
Xiaoyu Wang 已提交
66 67
    {"COUNT",         TK_COUNT},
    {"CREATE",        TK_CREATE},
dengyihao's avatar
dengyihao 已提交
68
    {"CONTAINS",      TK_CONTAINS}, 
X
Xiaoyu Wang 已提交
69 70 71 72
    {"DATABASE",      TK_DATABASE},
    {"DATABASES",     TK_DATABASES},
    {"DAYS",          TK_DAYS},
    {"DBS",           TK_DBS},
X
Xiaoyu Wang 已提交
73
    {"DELETE",        TK_DELETE},
X
Xiaoyu Wang 已提交
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
    {"DESC",          TK_DESC},
    {"DESCRIBE",      TK_DESCRIBE},
    {"DISTINCT",      TK_DISTINCT},
    {"DNODE",         TK_DNODE},
    {"DNODES",        TK_DNODES},
    {"DOUBLE",        TK_DOUBLE},
    {"DROP",          TK_DROP},
    {"EXISTS",        TK_EXISTS},
    {"EXPLAIN",       TK_EXPLAIN},
    {"FILE_FACTOR",   TK_FILE_FACTOR},
    {"FILL",          TK_FILL},
    {"FIRST",         TK_FIRST},
    {"FLOAT",         TK_FLOAT},
    {"FROM",          TK_FROM},
    {"FSYNC",         TK_FSYNC},
    {"FUNCTION",      TK_FUNCTION},
    {"FUNCTIONS",     TK_FUNCTIONS},
91
    {"GRANT",         TK_GRANT},
X
Xiaoyu Wang 已提交
92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
    {"GRANTS",        TK_GRANTS},
    {"GROUP",         TK_GROUP},
    {"HAVING",        TK_HAVING},
    {"IF",            TK_IF},
    {"IMPORT",        TK_IMPORT},
    {"IN",            TK_IN},
    {"INDEX",         TK_INDEX},
    {"INDEXES",       TK_INDEXES},
    {"INNER",         TK_INNER},
    {"INT",           TK_INT},
    {"INSERT",        TK_INSERT},
    {"INTEGER",       TK_INTEGER},
    {"INTERVAL",      TK_INTERVAL},
    {"INTO",          TK_INTO},
    {"IS",            TK_IS},
    {"JOIN",          TK_JOIN},
    {"JSON",          TK_JSON},
    {"KEEP",          TK_KEEP},
    {"KILL",          TK_KILL},
    {"LAST",          TK_LAST},
    {"LAST_ROW",      TK_LAST_ROW},
    {"LICENCE",       TK_LICENCE},
    {"LIKE",          TK_LIKE},
    {"LIMIT",         TK_LIMIT},
    {"LINEAR",        TK_LINEAR},
    {"LOCAL",         TK_LOCAL},
    {"MATCH",         TK_MATCH},
    {"MAXROWS",       TK_MAXROWS},
    {"MINROWS",       TK_MINROWS},
    {"MINUS",         TK_MINUS},
    {"MNODE",         TK_MNODE},
    {"MNODES",        TK_MNODES},
    {"MODIFY",        TK_MODIFY},
    {"MODULES",       TK_MODULES},
    {"NCHAR",         TK_NCHAR},
    {"NEXT",          TK_NEXT},
    {"NMATCH",        TK_NMATCH},
    {"NONE",          TK_NONE},
    {"NOT",           TK_NOT},
    {"NOW",           TK_NOW},
    {"NULL",          TK_NULL},
    {"NULLS",         TK_NULLS},
    {"OFFSET",        TK_OFFSET},
    {"ON",            TK_ON},
    {"OR",            TK_OR},
    {"ORDER",         TK_ORDER},
    {"OUTPUTTYPE",    TK_OUTPUTTYPE},
    {"PARTITION",     TK_PARTITION},
    {"PASS",          TK_PASS},
    {"PAGES",         TK_PAGES},
    {"PAGESIZE",      TK_PAGESIZE},
    {"PORT",          TK_PORT},
    {"PPS",           TK_PPS},
    {"PRECISION",     TK_PRECISION},
    {"PRIVILEGE",     TK_PRIVILEGE},
    {"PREV",          TK_PREV},
    {"QNODE",         TK_QNODE},
    {"QNODES",        TK_QNODES},
    {"QTIME",         TK_QTIME},
    {"QUERIES",       TK_QUERIES},
    {"QUERY",         TK_QUERY},
    {"RATIO",         TK_RATIO},
154 155
    {"READ",          TK_READ},
    {"RENAME",        TK_RENAME},
X
Xiaoyu Wang 已提交
156 157 158
    {"REPLICA",       TK_REPLICA},
    {"RESET",         TK_RESET},
    {"RETENTIONS",    TK_RETENTIONS},
159
    {"REVOKE",        TK_REVOKE},
X
Xiaoyu Wang 已提交
160
    {"ROLLUP",        TK_ROLLUP},
X
Xiaoyu Wang 已提交
161
    {"SCHEMALESS",    TK_SCHEMALESS},
X
Xiaoyu Wang 已提交
162 163 164 165 166
    {"SCORES",        TK_SCORES},
    {"SELECT",        TK_SELECT},
    {"SESSION",       TK_SESSION},
    {"SET",           TK_SET},
    {"SHOW",          TK_SHOW},
167
    {"SINGLE_STABLE", TK_SINGLE_STABLE},
X
Xiaoyu Wang 已提交
168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191
    {"SLIDING",       TK_SLIDING},
    {"SLIMIT",        TK_SLIMIT},
    {"SMA",           TK_SMA},
    {"SMALLINT",      TK_SMALLINT},
    {"SNODE",         TK_SNODE},
    {"SNODES",        TK_SNODES},
    {"SOFFSET",       TK_SOFFSET},
    {"STABLE",        TK_STABLE},
    {"STABLES",       TK_STABLES},
    {"STATE",         TK_STATE},
    {"STATE_WINDOW",  TK_STATE_WINDOW},
    {"STORAGE",       TK_STORAGE},
    {"STREAM",        TK_STREAM},
    {"STREAMS",       TK_STREAMS},
    {"STRICT",        TK_STRICT},
    {"SYNCDB",        TK_SYNCDB},
    {"TABLE",         TK_TABLE},
    {"TABLES",        TK_TABLES},
    {"TAG",           TK_TAG},
    {"TAGS",          TK_TAGS},
    {"TBNAME",        TK_TBNAME},
    {"TIMESTAMP",     TK_TIMESTAMP},
    {"TIMEZONE",      TK_TIMEZONE},
    {"TINYINT",       TK_TINYINT},
192
    {"TO",            TK_TO},
X
Xiaoyu Wang 已提交
193 194 195
    {"TODAY",         TK_TODAY},
    {"TOPIC",         TK_TOPIC},
    {"TOPICS",        TK_TOPICS},
196 197
    {"TRANSACTION",   TK_TRANSACTION},
    {"TRANSACTIONS",  TK_TRANSACTIONS},
X
Xiaoyu Wang 已提交
198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217
    {"TRIGGER",       TK_TRIGGER},
    {"TSERIES",       TK_TSERIES},
    {"TTL",           TK_TTL},
    {"UNION",         TK_UNION},
    {"UNSIGNED",      TK_UNSIGNED},
    {"USE",           TK_USE},
    {"USER",          TK_USER},
    {"USERS",         TK_USERS},
    {"USING",         TK_USING},
    {"VALUE",         TK_VALUE},
    {"VALUES",        TK_VALUES},
    {"VARCHAR",       TK_VARCHAR},
    {"VARIABLES",     TK_VARIABLES},
    {"VERBOSE",       TK_VERBOSE},
    {"VGROUPS",       TK_VGROUPS},
    {"VNODES",        TK_VNODES},
    {"WAL",           TK_WAL},
    {"WATERMARK",     TK_WATERMARK},
    {"WHERE",         TK_WHERE},
    {"WINDOW_CLOSE",  TK_WINDOW_CLOSE},
218
    {"WRITE",         TK_WRITE},
X
Xiaoyu Wang 已提交
219
    {"_C0",           TK_ROWTS},
X
Xiaoyu Wang 已提交
220 221 222 223 224 225
    {"_QENDTS",       TK_QENDTS},
    {"_QSTARTTS",     TK_QSTARTTS},
    {"_ROWTS",        TK_ROWTS},
    {"_WDURATION",    TK_WDURATION},
    {"_WENDTS",       TK_WENDTS},
    {"_WSTARTTS",     TK_WSTARTTS},
226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
    // {"ID",           TK_ID},
    // {"STRING",       TK_STRING},
    // {"EQ",           TK_EQ},
    // {"NE",           TK_NE},
    // {"ISNULL",       TK_ISNULL},
    // {"NOTNULL",      TK_NOTNULL},
    // {"GLOB",         TK_GLOB},
    // {"GT",           TK_GT},
    // {"GE",           TK_GE},
    // {"LT",           TK_LT},
    // {"LE",           TK_LE},
    // {"BITAND",       TK_BITAND},
    // {"BITOR",        TK_BITOR},
    // {"LSHIFT",       TK_LSHIFT},
    // {"RSHIFT",       TK_RSHIFT},
    // {"PLUS",         TK_PLUS},
    // {"DIVIDE",       TK_DIVIDE},
    // {"TIMES",        TK_TIMES},
    // {"STAR",         TK_STAR},
    // {"SLASH",        TK_SLASH},
    // {"REM ",         TK_REM},
    // {"||",           TK_CONCAT},
    // {"UMINUS",       TK_UMINUS},
    // {"UPLUS",        TK_UPLUS},
    // {"BITNOT",       TK_BITNOT},
    // {"DOT",          TK_DOT},
    // {"CTIME",        TK_CTIME},
    // {"LP",           TK_LP},
    // {"RP",           TK_RP},
    // {"COMMA",        TK_COMMA},
    // {"EVERY",        TK_EVERY},
    // {"VARIABLE",     TK_VARIABLE},
    // {"UPDATE",       TK_UPDATE},
    // {"CHANGE",       TK_CHANGE},
    // {"COLON",        TK_COLON},
    // {"ABORT",        TK_ABORT},
    // {"AFTER",        TK_AFTER},
    // {"ATTACH",       TK_ATTACH},
    // {"BEFORE",       TK_BEFORE},
    // {"BEGIN",        TK_BEGIN},
    // {"CASCADE",      TK_CASCADE},
    // {"CONFLICT",     TK_CONFLICT},
    // {"COPY",         TK_COPY},
    // {"DEFERRED",     TK_DEFERRED},
    // {"DELIMITERS",   TK_DELIMITERS},
    // {"DETACH",       TK_DETACH},
    // {"EACH",         TK_EACH},
    // {"END",          TK_END},
    // {"FAIL",         TK_FAIL},
    // {"FOR",          TK_FOR},
    // {"IGNORE",       TK_IGNORE},
    // {"IMMEDIATE",    TK_IMMEDIATE},
    // {"INITIALLY",    TK_INITIALLY},
    // {"INSTEAD",      TK_INSTEAD},
    // {"KEY",          TK_KEY},
    // {"OF",           TK_OF},
    // {"RAISE",        TK_RAISE},
    // {"REPLACE",      TK_REPLACE},
    // {"RESTRICT",     TK_RESTRICT},
    // {"ROW",          TK_ROW},
    // {"STATEMENT",    TK_STATEMENT},
    // {"VIEW",         TK_VIEW},
    // {"SEMI",         TK_SEMI},
289
    // {"PARTITIONS",   TK_PARTITIONS},
290
    // {"MODE",         TK_MODE},
H
hzcheng 已提交
291
};
X
Xiaoyu Wang 已提交
292
// clang-format on
H
hzcheng 已提交
293 294 295 296 297 298 299 300 301 302 303 304 305

static const char isIdChar[] = {
    /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
};

H
Haojun Liao 已提交
306
static void* keywordHashTable = NULL;
H
hzcheng 已提交
307

S
TD-1057  
Shengliang Guan 已提交
308
static void doInitKeywordsTable(void) {
309
  int numOfEntries = tListLen(keywordTable);
X
Xiaoyu Wang 已提交
310

H
Haojun Liao 已提交
311
  keywordHashTable = taosHashInit(numOfEntries, MurmurHash3_32, true, false);
312
  for (int32_t i = 0; i < numOfEntries; i++) {
S
TD-1057  
Shengliang Guan 已提交
313
    keywordTable[i].len = (uint8_t)strlen(keywordTable[i].name);
314
    void* ptr = &keywordTable[i];
H
Haojun Liao 已提交
315
    taosHashPut(keywordHashTable, keywordTable[i].name, keywordTable[i].len, (void*)&ptr, POINTER_BYTES);
H
hzcheng 已提交
316
  }
317 318
}

wafwerar's avatar
wafwerar 已提交
319
static TdThreadOnce keywordsHashTableInit = PTHREAD_ONCE_INIT;
320

321
static int32_t tKeywordCode(const char* z, int n) {
wafwerar's avatar
wafwerar 已提交
322
  taosThreadOnce(&keywordsHashTableInit, doInitKeywordsTable);
X
Xiaoyu Wang 已提交
323

H
hjxilinx 已提交
324
  char key[512] = {0};
X
Xiaoyu Wang 已提交
325
  if (n > tListLen(key)) {  // too long token, can not be any other token type
326
    return TK_NK_ID;
327
  }
X
Xiaoyu Wang 已提交
328

H
hzcheng 已提交
329 330
  for (int32_t j = 0; j < n; ++j) {
    if (z[j] >= 'a' && z[j] <= 'z') {
331
      key[j] = (char)(z[j] & 0xDF);  // to uppercase and set the null-terminated
H
hzcheng 已提交
332 333 334 335 336
    } else {
      key[j] = z[j];
    }
  }

D
dapan1121 已提交
337
  if (keywordHashTable == NULL) {
338
    return TK_NK_ILLEGAL;
D
dapan1121 已提交
339
  }
H
Haojun Liao 已提交
340

H
Haojun Liao 已提交
341
  SKeyword** pKey = (SKeyword**)taosHashGet(keywordHashTable, key, n);
X
Xiaoyu Wang 已提交
342
  return (pKey != NULL) ? (*pKey)->type : TK_NK_ID;
H
hzcheng 已提交
343 344
}

H
huili 已提交
345
/*
346 347 348
 * Return the length of the token that begins at z[0].
 * Store the token type in *type before returning.
 */
349
uint32_t tGetToken(const char* z, uint32_t* tokenId) {
350
  uint32_t i;
H
hzcheng 已提交
351 352 353 354 355 356 357 358
  switch (*z) {
    case ' ':
    case '\t':
    case '\n':
    case '\f':
    case '\r': {
      for (i = 1; isspace(z[i]); i++) {
      }
359
      *tokenId = TK_NK_SPACE;
H
hzcheng 已提交
360 361 362
      return i;
    }
    case ':': {
363
      *tokenId = TK_NK_COLON;
H
hzcheng 已提交
364 365 366 367 368 369
      return 1;
    }
    case '-': {
      if (z[1] == '-') {
        for (i = 2; z[i] && z[i] != '\n'; i++) {
        }
370
        *tokenId = TK_NK_COMMENT;
H
hzcheng 已提交
371
        return i;
372 373 374
      } else if (z[1] == '>') {
        *tokenId = TK_NK_ARROW;
        return 2;
H
hzcheng 已提交
375
      }
X
Xiaoyu Wang 已提交
376
      *tokenId = TK_NK_MINUS;
H
hzcheng 已提交
377 378 379
      return 1;
    }
    case '(': {
380
      *tokenId = TK_NK_LP;
H
hzcheng 已提交
381 382 383
      return 1;
    }
    case ')': {
384
      *tokenId = TK_NK_RP;
H
hzcheng 已提交
385 386 387
      return 1;
    }
    case ';': {
388
      *tokenId = TK_NK_SEMI;
H
hzcheng 已提交
389 390 391
      return 1;
    }
    case '+': {
392
      *tokenId = TK_NK_PLUS;
H
hzcheng 已提交
393 394 395
      return 1;
    }
    case '*': {
396
      *tokenId = TK_NK_STAR;
H
hzcheng 已提交
397 398 399 400
      return 1;
    }
    case '/': {
      if (z[1] != '*' || z[2] == 0) {
401
        *tokenId = TK_NK_SLASH;
H
hzcheng 已提交
402 403 404 405 406
        return 1;
      }
      for (i = 3; z[i] && (z[i] != '/' || z[i - 1] != '*'); i++) {
      }
      if (z[i]) i++;
407
      *tokenId = TK_NK_COMMENT;
H
hzcheng 已提交
408 409 410
      return i;
    }
    case '%': {
411
      *tokenId = TK_NK_REM;
H
hzcheng 已提交
412 413 414
      return 1;
    }
    case '=': {
415
      *tokenId = TK_NK_EQ;
H
hzcheng 已提交
416 417 418 419
      return 1 + (z[1] == '=');
    }
    case '<': {
      if (z[1] == '=') {
420
        *tokenId = TK_NK_LE;
H
hzcheng 已提交
421 422
        return 2;
      } else if (z[1] == '>') {
423
        *tokenId = TK_NK_NE;
H
hzcheng 已提交
424 425
        return 2;
      } else if (z[1] == '<') {
426
        *tokenId = TK_NK_LSHIFT;
H
hzcheng 已提交
427 428
        return 2;
      } else {
429
        *tokenId = TK_NK_LT;
H
hzcheng 已提交
430 431 432 433 434
        return 1;
      }
    }
    case '>': {
      if (z[1] == '=') {
435
        *tokenId = TK_NK_GE;
H
hzcheng 已提交
436 437
        return 2;
      } else if (z[1] == '>') {
438
        *tokenId = TK_NK_RSHIFT;
H
hzcheng 已提交
439 440
        return 2;
      } else {
441
        *tokenId = TK_NK_GT;
H
hzcheng 已提交
442 443 444 445 446
        return 1;
      }
    }
    case '!': {
      if (z[1] != '=') {
447
        *tokenId = TK_NK_ILLEGAL;
H
hzcheng 已提交
448 449
        return 2;
      } else {
450
        *tokenId = TK_NK_NE;
H
hzcheng 已提交
451 452 453 454 455
        return 2;
      }
    }
    case '|': {
      if (z[1] != '|') {
456
        *tokenId = TK_NK_BITOR;
H
hzcheng 已提交
457 458
        return 1;
      } else {
459
        *tokenId = TK_NK_CONCAT;
H
hzcheng 已提交
460 461 462 463
        return 2;
      }
    }
    case ',': {
464
      *tokenId = TK_NK_COMMA;
H
hzcheng 已提交
465 466 467
      return 1;
    }
    case '&': {
468
      *tokenId = TK_NK_BITAND;
H
hzcheng 已提交
469 470 471
      return 1;
    }
    case '~': {
472
      *tokenId = TK_NK_BITNOT;
H
hzcheng 已提交
473 474
      return 1;
    }
S
slguan 已提交
475
    case '?': {
476
      *tokenId = TK_NK_QUESTION;
S
slguan 已提交
477 478
      return 1;
    }
479
    case '`':
H
hzcheng 已提交
480 481
    case '\'':
    case '"': {
S
slguan 已提交
482 483
      int  delim = z[0];
      bool strEnd = false;
H
hzcheng 已提交
484
      for (i = 1; z[i]; i++) {
485
        if (delim != '`' && z[i] == '\\') {  // ignore the escaped character that follows this backslash
L
[1292]  
lihui 已提交
486 487 488
          i++;
          continue;
        }
X
Xiaoyu Wang 已提交
489

490
        if (z[i] == delim) {
H
hzcheng 已提交
491 492 493
          if (z[i + 1] == delim) {
            i++;
          } else {
H
huili 已提交
494
            strEnd = true;
H
hzcheng 已提交
495 496 497 498
            break;
          }
        }
      }
X
Xiaoyu Wang 已提交
499

H
hzcheng 已提交
500
      if (z[i]) i++;
H
huili 已提交
501

S
slguan 已提交
502
      if (strEnd) {
X
Xiaoyu Wang 已提交
503
        *tokenId = (delim == '`') ? TK_NK_ID : TK_NK_STRING;
S
slguan 已提交
504 505
        return i;
      }
H
huili 已提交
506

S
slguan 已提交
507
      break;
H
hzcheng 已提交
508 509
    }
    case '.': {
S
slguan 已提交
510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526
      /*
       * handle the the float number with out integer part
       * .123
       * .123e4
       */
      if (isdigit(z[1])) {
        for (i = 2; isdigit(z[i]); i++) {
        }

        if ((z[i] == 'e' || z[i] == 'E') &&
            (isdigit(z[i + 1]) || ((z[i + 1] == '+' || z[i + 1] == '-') && isdigit(z[i + 2])))) {
          i += 2;
          while (isdigit(z[i])) {
            i++;
          }
        }

527
        *tokenId = TK_NK_FLOAT;
S
slguan 已提交
528 529
        return i;
      } else {
530
        *tokenId = TK_NK_DOT;
S
slguan 已提交
531 532 533 534 535 536 537
        return 1;
      }
    }

    case '0': {
      char next = z[1];

X
Xiaoyu Wang 已提交
538
      if (next == 'b') {  // bin number
539
        *tokenId = TK_NK_BIN;
S
slguan 已提交
540 541 542 543 544 545 546 547
        for (i = 2; (z[i] == '0' || z[i] == '1'); ++i) {
        }

        if (i == 2) {
          break;
        }

        return i;
X
Xiaoyu Wang 已提交
548
      } else if (next == 'x') {  // hex number
549
        *tokenId = TK_NK_HEX;
S
slguan 已提交
550 551 552 553 554 555 556 557 558
        for (i = 2; isdigit(z[i]) || (z[i] >= 'a' && z[i] <= 'f') || (z[i] >= 'A' && z[i] <= 'F'); ++i) {
        }

        if (i == 2) {
          break;
        }

        return i;
      }
H
hzcheng 已提交
559 560 561 562 563 564 565 566 567 568
    }
    case '1':
    case '2':
    case '3':
    case '4':
    case '5':
    case '6':
    case '7':
    case '8':
    case '9': {
569
      *tokenId = TK_NK_INTEGER;
H
hzcheng 已提交
570 571 572
      for (i = 1; isdigit(z[i]); i++) {
      }

H
Haojun Liao 已提交
573
      /* here is the 1u/1a/2s/3m/9y */
X
Xiaoyu Wang 已提交
574 575 576
      if ((z[i] == 'b' || z[i] == 'u' || z[i] == 'a' || z[i] == 's' || z[i] == 'm' || z[i] == 'h' || z[i] == 'd' ||
           z[i] == 'n' || z[i] == 'y' || z[i] == 'w' || z[i] == 'B' || z[i] == 'U' || z[i] == 'A' || z[i] == 'S' ||
           z[i] == 'M' || z[i] == 'H' || z[i] == 'D' || z[i] == 'N' || z[i] == 'Y' || z[i] == 'W') &&
H
hjxilinx 已提交
577
          (isIdChar[(uint8_t)z[i + 1]] == 0)) {
578
        *tokenId = TK_NK_VARIABLE;
H
hzcheng 已提交
579 580 581 582 583 584 585 586 587 588
        i += 1;
        return i;
      }

      int32_t seg = 1;
      while (z[i] == '.' && isdigit(z[i + 1])) {
        i += 2;
        while (isdigit(z[i])) {
          i++;
        }
589
        *tokenId = TK_NK_FLOAT;
H
hzcheng 已提交
590 591 592 593
        seg++;
      }

      if (seg == 4) {  // ip address
594
        *tokenId = TK_NK_IPTOKEN;
H
hzcheng 已提交
595
        return i;
X
Xiaoyu Wang 已提交
596 597
      } else if (seg > 2) {
        break;
H
hzcheng 已提交
598 599 600 601 602 603 604 605
      }

      if ((z[i] == 'e' || z[i] == 'E') &&
          (isdigit(z[i + 1]) || ((z[i + 1] == '+' || z[i + 1] == '-') && isdigit(z[i + 2])))) {
        i += 2;
        while (isdigit(z[i])) {
          i++;
        }
606
        *tokenId = TK_NK_FLOAT;
H
hzcheng 已提交
607 608 609
      }
      return i;
    }
X
Xiaoyu Wang 已提交
610 611 612 613 614 615
    // case '[': {
    //   for (i = 1; z[i] && z[i - 1] != ']'; i++) {
    //   }
    //   *tokenId = TK_NK_ID;
    //   return i;
    // }
H
hzcheng 已提交
616 617 618 619
    case 'T':
    case 't':
    case 'F':
    case 'f': {
X
Xiaoyu Wang 已提交
620
      for (i = 1; ((z[i] & 0x80) == 0) && isIdChar[(uint8_t)z[i]]; i++) {
H
hzcheng 已提交
621 622 623
      }

      if ((i == 4 && strncasecmp(z, "true", 4) == 0) || (i == 5 && strncasecmp(z, "false", 5) == 0)) {
624
        *tokenId = TK_NK_BOOL;
H
hzcheng 已提交
625 626 627 628
        return i;
      }
    }
    default: {
X
Xiaoyu Wang 已提交
629
      if (((*z & 0x80) != 0) || !isIdChar[(uint8_t)*z]) {
H
hzcheng 已提交
630 631
        break;
      }
X
Xiaoyu Wang 已提交
632
      for (i = 1; ((z[i] & 0x80) == 0) && isIdChar[(uint8_t)z[i]]; i++) {
H
hzcheng 已提交
633
      }
634
      *tokenId = tKeywordCode(z, i);
H
hzcheng 已提交
635 636 637 638
      return i;
    }
  }

639
  *tokenId = TK_NK_ILLEGAL;
H
hzcheng 已提交
640 641 642
  return 0;
}

X
Xiaoyu Wang 已提交
643 644 645
SToken tscReplaceStrToken(char** str, SToken* token, const char* newToken) {
  char*   src = *str;
  size_t  nsize = strlen(newToken);
D
dapan1121 已提交
646 647
  int32_t size = (int32_t)strlen(*str) - token->n + (int32_t)nsize + 1;
  int32_t bsize = (int32_t)((uint64_t)token->z - (uint64_t)src);
X
Xiaoyu Wang 已提交
648
  SToken  ntoken;
D
dapan1121 已提交
649

wafwerar's avatar
wafwerar 已提交
650
  *str = taosMemoryCalloc(1, size);
D
dapan1121 已提交
651 652

  strncpy(*str, src, bsize);
H
Haojun Liao 已提交
653
  strcat(*str, newToken);
D
dapan1121 已提交
654 655
  strcat(*str, token->z + token->n);

D
dapan1121 已提交
656
  ntoken.n = (uint32_t)nsize;
D
dapan1121 已提交
657 658
  ntoken.z = *str + bsize;

wafwerar's avatar
wafwerar 已提交
659
  taosMemoryFreeClear(src);
D
dapan1121 已提交
660 661 662 663

  return ntoken;
}

664
SToken tStrGetToken(const char* str, int32_t* i, bool isPrevOptr) {
H
Haojun Liao 已提交
665
  SToken t0 = {0};
S
slguan 已提交
666

H
hzcheng 已提交
667 668
  // here we reach the end of sql string, null-terminated string
  if (str[*i] == 0) {
S
slguan 已提交
669 670
    t0.n = 0;
    return t0;
H
hzcheng 已提交
671 672
  }

673
  // IGNORE TK_NK_SPACE, TK_NK_COMMA, and specified tokens
S
slguan 已提交
674 675 676
  while (1) {
    *i += t0.n;

H
Haojun Liao 已提交
677
    int32_t numOfComma = 0;
X
Xiaoyu Wang 已提交
678
    char    t = str[*i];
H
Haojun Liao 已提交
679 680 681 682
    while (t == ' ' || t == '\n' || t == '\r' || t == '\t' || t == '\f' || t == ',') {
      if (t == ',' && (++numOfComma > 1)) {  // comma only allowed once
        t0.n = 0;
        return t0;
S
slguan 已提交
683
      }
X
Xiaoyu Wang 已提交
684

H
Haojun Liao 已提交
685
      t = str[++(*i)];
S
slguan 已提交
686
    }
H
hzcheng 已提交
687

688
    t0.n = tGetToken(&str[*i], &t0.type);
H
Haojun Liao 已提交
689
    break;
S
slguan 已提交
690

H
Haojun Liao 已提交
691 692
    // not support user specfied ignored symbol list
#if 0
H
Haojun Liao 已提交
693
    bool ignore = false;
S
slguan 已提交
694 695
    for (uint32_t k = 0; k < numOfIgnoreToken; k++) {
      if (t0.type == ignoreTokenTypes[k]) {
H
Haojun Liao 已提交
696
        ignore = true;
S
slguan 已提交
697 698 699 700
        break;
      }
    }

H
Haojun Liao 已提交
701
    if (!ignore) {
S
slguan 已提交
702 703
      break;
    }
H
Haojun Liao 已提交
704
#endif
H
hzcheng 已提交
705 706
  }

707
  if (t0.type == TK_NK_SEMI) {
S
slguan 已提交
708
    t0.n = 0;
D
dapan1121 已提交
709
    t0.type = 0;
S
slguan 已提交
710 711 712 713 714 715 716 717
    return t0;
  }

  uint32_t type = 0;
  int32_t  len;

  // support parse the 'db.tbl' format, notes: There should be no space on either side of the dot!
  if ('.' == str[*i + t0.n]) {
718
    len = tGetToken(&str[*i + t0.n + 1], &type);
S
slguan 已提交
719 720

    // only id and string are valid
721
    if ((TK_NK_STRING != t0.type) && (TK_NK_ID != t0.type)) {
722
      t0.type = TK_NK_ILLEGAL;
S
slguan 已提交
723 724 725 726 727 728 729 730 731
      t0.n = 0;

      return t0;
    }

    t0.n += len + 1;

  } else {
    // support parse the -/+number format
X
Xiaoyu Wang 已提交
732
    if ((isPrevOptr) && (t0.type == TK_NK_MINUS || t0.type == TK_NK_PLUS)) {
733
      len = tGetToken(&str[*i + t0.n], &type);
734
      if (type == TK_NK_INTEGER || type == TK_NK_FLOAT) {
S
slguan 已提交
735 736 737
        t0.type = type;
        t0.n += len;
      }
H
hzcheng 已提交
738 739 740
    }
  }

X
Xiaoyu Wang 已提交
741
  t0.z = (char*)str + (*i);
S
slguan 已提交
742 743 744
  *i += t0.n;

  return t0;
H
hzcheng 已提交
745 746
}

X
Xiaoyu Wang 已提交
747
bool taosIsKeyWordToken(const char* z, int32_t len) { return (tKeywordCode((char*)z, len) != TK_NK_ID); }
H
Haojun Liao 已提交
748 749

void taosCleanupKeywordsTable() {
H
Haojun Liao 已提交
750 751 752 753
  void* m = keywordHashTable;
  if (m != NULL && atomic_val_compare_exchange_ptr(&keywordHashTable, m, 0) == m) {
    taosHashCleanup(m);
  }
Y
yihaoDeng 已提交
754
}
755

H
Haojun Liao 已提交
756
SToken taosTokenDup(SToken* pToken, char* buf, int32_t len) {
757
  assert(pToken != NULL && buf != NULL && len > pToken->n);
X
Xiaoyu Wang 已提交
758

759 760 761
  strncpy(buf, pToken->z, pToken->n);
  buf[pToken->n] = 0;

H
Haojun Liao 已提交
762
  SToken token = *pToken;
763 764 765
  token.z = buf;
  return token;
}